sandbox338
/

ENSEMBLE-OBJECT-DETECTION

Model card Files Files and versions

xet

Community

sandbox338 commited on Jun 16, 2025

Commit

8f8c75e

verified ·

1 Parent(s): 6e13a93

Upload model_architecture.py with huggingface_hub

Browse files

Files changed (1) hide show

model_architecture.py +155 -0

model_architecture.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# Essential code to recreate model architecture
+import torch
+import torch.nn as nn
+import torchvision.models as models
+from detectron2.modeling.backbone import Backbone, BACKBONE_REGISTRY
+from detectron2.layers import ShapeSpec
+class WildlifeInceptionBackbone(nn.Module):
+    def __init__(self):
+        super(WildlifeInceptionBackbone, self).__init__()
+        inception = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1, aux_logits=True)
+        inception.eval()
+        self.Conv2d_1a_3x3 = inception.Conv2d_1a_3x3
+        self.Conv2d_2a_3x3 = inception.Conv2d_2a_3x3
+        self.Conv2d_2b_3x3 = inception.Conv2d_2b_3x3
+        self.maxpool1 = inception.maxpool1
+        self.Conv2d_3b_1x1 = inception.Conv2d_3b_1x1
+        self.Conv2d_4a_3x3 = inception.Conv2d_4a_3x3
+        self.maxpool2 = inception.maxpool2
+        self.Mixed_5b = inception.Mixed_5b
+        self.Mixed_5c = inception.Mixed_5c
+        self.Mixed_5d = inception.Mixed_5d
+        self.Mixed_6a = inception.Mixed_6a
+        self.Mixed_6b = inception.Mixed_6b
+        self.Mixed_6c = inception.Mixed_6c
+        self.Mixed_6d = inception.Mixed_6d
+        self.Mixed_6e = inception.Mixed_6e
+        self.Mixed_7a = inception.Mixed_7a
+        self.Mixed_7b = inception.Mixed_7b
+        self.Mixed_7c = inception.Mixed_7c
+        self.level4_enhance = nn.Sequential(
+            nn.Conv2d(768, 256, 3, padding=1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, 1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True)
+        )
+        self.level5_enhance = nn.Sequential(
+            nn.Conv2d(2048, 256, 3, padding=1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, 1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True)
+        )
+        self._init_weights()
+    def _init_weights(self):
+        for m in [self.level4_enhance, self.level5_enhance]:
+            for layer in m.modules():
+                if isinstance(layer, nn.Conv2d):
+                    nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
+                elif isinstance(layer, nn.BatchNorm2d):
+                    nn.init.constant_(layer.weight, 1)
+                    nn.init.constant_(layer.bias, 0)
+    def forward(self, x):
+        x = self.Conv2d_1a_3x3(x)
+        x = self.Conv2d_2a_3x3(x)
+        x = self.Conv2d_2b_3x3(x)
+        x = self.maxpool1(x)
+        x = self.Conv2d_3b_1x1(x)
+        x = self.Conv2d_4a_3x3(x)
+        x = self.maxpool2(x)
+        x = self.Mixed_5b(x)
+        x = self.Mixed_5c(x)
+        x = self.Mixed_5d(x)
+        x = self.Mixed_6a(x)
+        x = self.Mixed_6b(x)
+        x = self.Mixed_6c(x)
+        x = self.Mixed_6d(x)
+        level4_raw = self.Mixed_6e(x)
+        level4_features = self.level4_enhance(level4_raw)
+        x = self.Mixed_7a(level4_raw)
+        x = self.Mixed_7b(x)
+        level5_raw = self.Mixed_7c(x)
+        level5_features = self.level5_enhance(level5_raw)
+        return {
+            "res4": level4_features,
+            "res5": level5_features
+        }
+class EnhancedResNetBackbone(nn.Module):
+    def __init__(self):
+        super(EnhancedResNetBackbone, self).__init__()
+        resnet = models.resnet101(weights=models.ResNet101_Weights.IMAGENET1K_V2)
+        self.conv1 = resnet.conv1
+        self.bn1 = resnet.bn1
+        self.relu = resnet.relu
+        self.maxpool = resnet.maxpool
+        self.layer1 = resnet.layer1
+        self.layer2 = resnet.layer2
+        self.layer3 = resnet.layer3
+        self.layer4 = resnet.layer4
+        self.enhance_res4 = nn.Sequential(
+            nn.Conv2d(1024, 256, 3, padding=1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, 1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True)
+        )
+        self.enhance_res5 = nn.Sequential(
+            nn.Conv2d(2048, 256, 3, padding=1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(256, 256, 1, bias=False),
+            nn.BatchNorm2d(256),
+            nn.ReLU(inplace=True)
+        )
+        self._init_weights()
+    def _init_weights(self):
+        for m in [self.enhance_res4, self.enhance_res5]:
+            for layer in m.modules():
+                if isinstance(layer, nn.Conv2d):
+                    nn.init.kaiming_normal_(layer.weight, mode='fan_out', nonlinearity='relu')
+                elif isinstance(layer, nn.BatchNorm2d):
+                    nn.init.constant_(layer.weight, 1)
+                    nn.init.constant_(layer.bias, 0)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        res4_raw = self.layer3(x)
+        res4_enhanced = self.enhance_res4(res4_raw)
+        res5_raw = self.layer4(res4_raw)
+        res5_enhanced = self.enhance_res5(res5_raw)
+        return {
+            "res4": res4_enhanced,
+            "res5": res5_enhanced
+        }