FinalVision

Paused

App Files Files Community

VisionLanguageGroup commited on Nov 16, 2025

Commit

bc63015

1 Parent(s): d3a435a

fix bug

Browse files

Files changed (1) hide show

models/model.py +5 -343

models/model.py CHANGED Viewed

@@ -4,6 +4,7 @@ import torch.nn.functional as F
 import os
 import clip
 import sys
 from models.seg_post_model.cellpose.models import CellposeModel
 from torchvision.ops import roi_align
@@ -53,99 +54,6 @@ class Counting_with_SD_features_track(nn.Module):
         self.adapter = adapter_roi_loca()
         self.regressor = regressor_with_SD_features_tra()
-class Counting_with_SD_features_loca_rand(nn.Module):
-    def __init__(self, scale_factor, num_of_roi = 3):
-        super(Counting_with_SD_features_loca_rand, self).__init__()
-        self.adapter = adapter_roi_loca_rand(num_of_roi=num_of_roi)
-        self.regressor = regressor_with_SD_features()
-class Counting_with_SD_features_loca_carpk(nn.Module):
-    def __init__(self, scale_factor, num_of_roi = 3):
-        super(Counting_with_SD_features_loca_carpk, self).__init__()
-        self.adapter = adapter_roi_loca_carpk(num_of_roi=num_of_roi)
-        self.regressor = regressor_with_SD_features()
-class Counting_with_SD_features_clip_carpk(nn.Module):
-    def __init__(self, scale_factor, num_of_roi = 3):
-        super(Counting_with_SD_features_clip_carpk, self).__init__()
-        self.adapter = adapter_roi_clip_carpk(num_of_roi=num_of_roi)
-        # self.regressor = regressor_with_SD_features()
-class Counting_with_SD_features_zero(nn.Module):
-    def __init__(self, scale_factor):
-        super(Counting_with_SD_features_zero, self).__init__()
-        self.adapter = adapter_roi_zero()
-        self.regressor = regressor_with_SD_features()
-class Counting_with_SD_features_zero_loca(nn.Module):
-    def __init__(self, scale_factor):
-        super(Counting_with_SD_features_zero_loca, self).__init__()
-        self.adapter = adapter_roi_zero_loca()
-        self.regressor = regressor_with_SD_features()
-class Counting_with_SD_features_zero_loca_self(nn.Module):
-    def __init__(self, scale_factor):
-        super(Counting_with_SD_features_zero_loca_self, self).__init__()
-        self.adapter = adapter_roi_zero_loca()
-        # self.regressor = regressor_with_SD_features_self()
-        self.regressor = regressor_with_SD_features_latent()
-class Counting_with_SD_features_loca_v2(nn.Module):
-    def __init__(self, scale_factor):
-        super(Counting_with_SD_features_loca_v2, self).__init__()
-        self.adapter = adapter_roi_loca_v2()
-        # self.regressor = regressor_with_SD_features()
-class adapter1(nn.Module):
-    def __init__(self):
-        super(adapter1, self).__init__()
-        self.conv1 = nn.Conv2d(256, 128, kernel_size=3, padding=1)
-        self.pool = nn.MaxPool2d(2)
-        self.fc = nn.Linear(128 * 64 * 64, 768)
-        self.initialize_weights()
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.pool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-        return x
-    def initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                nn.init.xavier_normal_(m.weight)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-class adapter(nn.Module):
-    def __init__(self, pool_size=[3, 3]):
-        super(adapter, self).__init__()
-        self.pool_size = pool_size
-        self.conv1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
-        self.pool = nn.MaxPool2d(2)
-        self.fc = nn.Linear(256 * 3 * 3, 768)
-        self.initialize_weights()
-    def forward(self, xs):
-        x_list = []
-        for x in xs:
-            x = F.adaptive_max_pool2d(x, self.pool_size, return_indices=False) # [1, 256, 3, 3]
-            x_list.append(x)
-        x_list = torch.cat(x_list, dim=0)
-        x_list = torch.mean(x_list, dim=0, keepdim=True) # [1, 256, 3, 3]
-        x = self.conv1(x_list)
-        # x = self.pool(x)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-        return x
-    def initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                nn.init.xavier_normal_(m.weight)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
 class adapter_roi(nn.Module):
     def __init__(self, pool_size=[3, 3]):
@@ -279,256 +187,6 @@ class adapter_roi_loca(nn.Module):
                     nn.init.constant_(m.bias, 0)
-class adapter_roi_dino(nn.Module):
-    def __init__(self, pool_size=[3, 3]):
-        super(adapter_roi_dino, self).__init__()
-        self.pool_size = pool_size
-        # self.conv1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
-        # self.pool = nn.MaxPool2d(2)
-        self.fc = nn.Linear(1024, 768)
-        self.initialize_weights()
-    def forward(self, crops, dino_model):
-            num_of_boxes = len(crops)
-            feats = []
-            for i in range(num_of_boxes):
-                with torch.no_grad():
-                    feat = dino_model(crops[i])
-                feats.append(feat)
-            feats = torch.cat(feats, dim=0)
-            feats = torch.mean(feats, dim=0)
-            x = self.fc(feats)
-            return x
-    def initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                nn.init.xavier_normal_(m.weight)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-class adapter_roi_loca_v2(nn.Module):
-    def __init__(self, pool_size=[3, 3]):
-        super(adapter_roi_loca_v2, self).__init__()
-        self.pool_size = pool_size
-        self.conv1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
-        self.pool = nn.MaxPool2d(2)
-        self.fc = nn.Linear(256 * 3 * 3, 1024)
-        self.initialize_weights()
-    def forward(self, x, boxes):
-            rois = []
-            bs, _, h, w = x.shape
-            boxes = torch.cat([
-                torch.arange(
-                    bs, requires_grad=False
-                ).to(boxes.device).repeat_interleave(3).reshape(-1, 1),
-                boxes.flatten(0, 1),
-            ], dim=1)
-            rois = roi_align(
-                x,
-                boxes=boxes, output_size=3,
-                spatial_scale=1.0 / 8, aligned=True
-            )
-            rois = torch.mean(rois, dim=0, keepdim=True)
-            x = self.conv1(rois)
-            x = x.view(x.size(0), -1)
-            x = self.fc(x)
-            return x
-    def initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                nn.init.xavier_normal_(m.weight)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-class adapter_roi_zero(nn.Module):
-    def __init__(self, reduction=4):
-        super(adapter_roi_zero, self).__init__()
-        self.fc1 = nn.Sequential(
-            nn.Linear(768, 768 // reduction, bias=False),
-            nn.ReLU()
-            )
-        self.fc2 = nn.Sequential(
-            nn.Linear(768 // reduction, 768, bias=False),
-            nn.ReLU()
-            )
-        self.initialize_weights()
-    def forward(self, x):
-            x1 = self.fc1(x)
-            x1 = self.fc2(x1)
-            return x + x1
-    def initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                nn.init.xavier_normal_(m.weight)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-class adapter_roi_zero_loca(nn.Module):
-    def __init__(self, reduction=4):
-        super(adapter_roi_zero_loca, self).__init__()
-        self.fc1 = nn.Sequential(
-            nn.Linear(768, 768 // reduction, bias=False),
-            nn.ReLU()
-            )
-        self.fc2 = nn.Sequential(
-            nn.Linear(768 // reduction, 768, bias=False),
-            nn.ReLU()
-            )
-        self.pool_size = (3, 3)
-        self.conv1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
-        self.pool = nn.MaxPool2d(2)
-        self.fc = nn.Linear(256 * 3 * 3, 768)
-        self.initialize_weights()
-    def forward(self, feature, boxes, class_emb):
-            x1 = self.fc1(class_emb)
-            x1 = self.fc2(x1)
-            class_emb = class_emb + x1
-            rois = []
-            bs, _, h, w = feature.shape
-            n_box = boxes.shape[1]
-            boxes = torch.cat([
-                torch.arange(
-                    bs, requires_grad=False
-                ).to(boxes.device).repeat_interleave(n_box).reshape(-1, 1),
-                boxes.flatten(0, 1),
-            ], dim=1)
-            rois = roi_align(
-                feature,
-                boxes=boxes, output_size=3,
-                spatial_scale=1.0 / 8, aligned=True
-            )
-            # rois = torch.mean(rois, dim=0, keepdim=True)
-            x = self.conv1(rois)
-            x = x.view(x.size(0), -1)
-            x = self.fc(x)
-            if len(class_emb.shape) == 3:
-                class_emb = class_emb.squeeze(1)
-            dist = torch.cosine_similarity(class_emb, x) # [n_box]
-            _, topk = torch.sort(dist[:10])
-            x_topk = x[topk[:3], :]
-            x_topk = torch.mean(x_topk, dim=0, keepdim=True)
-            return x_topk + class_emb
-    def vis(self, feature, boxes, class_emb):
-        x1 = self.fc1(class_emb)
-        x1 = self.fc2(x1)
-        class_emb = class_emb + x1
-        rois = []
-        bs, _, h, w = feature.shape
-        n_box = boxes.shape[1]
-        boxes = torch.cat([
-            torch.arange(
-                bs, requires_grad=False
-            ).to(boxes.device).repeat_interleave(n_box).reshape(-1, 1),
-            boxes.flatten(0, 1),
-        ], dim=1)
-        rois = roi_align(
-            feature,
-            boxes=boxes, output_size=3,
-            spatial_scale=1.0 / 8, aligned=True
-        )
-        # rois = torch.mean(rois, dim=0, keepdim=True)
-        x = self.conv1(rois)
-        x = x.view(x.size(0), -1)
-        x = self.fc(x)
-        if len(class_emb.shape) == 3:
-            class_emb = class_emb.squeeze(1)
-        dist = torch.cosine_similarity(class_emb, x) # [n_box]
-        _, topk = torch.sort(dist[:10])
-        x_topk = x[topk[:3], :]
-        x_topk = torch.mean(x_topk, dim=0, keepdim=True)
-        return x_topk
-    def initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                nn.init.xavier_normal_(m.weight)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-class adapter_roi_loca_rand(nn.Module):
-    def __init__(self, pool_size=[3, 3],num_of_roi = 3):
-        super(adapter_roi_loca_rand, self).__init__()
-        self.pool_size = pool_size
-        self.num_of_roi = num_of_roi
-        self.conv1 = nn.Conv2d(256, 256, kernel_size=3, padding=1)
-        self.pool = nn.MaxPool2d(2)
-        self.fc = nn.Linear(256 * 3 * 3, 768)
-        # # **new
-        # self.fc1 = nn.Sequential(
-        #     nn.Linear(768, 768 // 4, bias=False),
-        #     nn.ReLU()
-        #     )
-        # self.fc2 = nn.Sequential(
-        #     nn.Linear(768 // 4, 768, bias=False),
-        #     nn.ReLU()
-        #     )
-        # #
-        self.initialize_weights()
-    def forward(self, x, boxes, rand_boxes):
-            num_of_boxes = boxes.shape[1]
-            bs, _, h, w = x.shape
-            boxes = torch.cat([
-                torch.arange(
-                    bs, requires_grad=False
-                ).to(boxes.device).repeat_interleave(num_of_boxes).reshape(-1, 1),
-                boxes.flatten(0, 1),
-            ], dim=1)
-            rois = roi_align(
-                x,
-                boxes=boxes, output_size=3,
-                spatial_scale=1.0 / 8, aligned=True
-            )
-            # new
-            num_of_boxes = rand_boxes.shape[1]
-            bs, _, h, w = x.shape
-            rand_boxes = torch.cat([
-                torch.arange(
-                    bs, requires_grad=False
-                ).to(rand_boxes.device).repeat_interleave(num_of_boxes).reshape(-1, 1),
-                rand_boxes.flatten(0, 1),
-            ], dim=1)
-            rand_rois = roi_align(
-                x,
-                boxes=rand_boxes, output_size=3,
-                spatial_scale=1.0 / 8, aligned=True
-            )
-            rois = torch.mean(rois, dim=0, keepdim=True)
-            # new
-            cos = torch.nn.CosineSimilarity(dim=1)
-            dist = cos(rois.view(1, -1), rand_rois.view(num_of_boxes, -1)) # [n_box]
-            _, topk = torch.sort(-dist)
-            x_topk = rand_rois[topk[:3], ...]
-            x_topk = torch.mean(x_topk, dim=0, keepdim=True)
-            rois += x_topk
-            x = self.conv1(rois)
-            x = x.view(x.size(0), -1)
-            x = self.fc(x)
-            # new
-            # x = self.fc1(x)
-            # x = self.fc2(x)
-            return x
-    def initialize_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
-                nn.init.xavier_normal_(m.weight)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
 class regressor1(nn.Module):
@@ -723,6 +381,8 @@ class regressor_with_SD_features_seg_vit_c3(nn.Module):
         out = self.vit_model.eval(img.squeeze().cpu().numpy(), feat=x.squeeze().cpu().numpy())[0]
         out = torch.from_numpy(out).unsqueeze(0).to(x.device)
         return out
@@ -763,6 +423,8 @@ class regressor_with_SD_features_tra(nn.Module):
         feat = x
         out = self.vit_model.eval(img.squeeze().cpu().numpy(), feat=x.squeeze().cpu().numpy())[0]
         out = torch.from_numpy(out).unsqueeze(0).to(x.device)
         return out, 0., feat

 import os
 import clip
 import sys
+import numpy as np
 from models.seg_post_model.cellpose.models import CellposeModel
 from torchvision.ops import roi_align
         self.adapter = adapter_roi_loca()
         self.regressor = regressor_with_SD_features_tra()
 class adapter_roi(nn.Module):
     def __init__(self, pool_size=[3, 3]):
                     nn.init.constant_(m.bias, 0)
 class regressor1(nn.Module):
         out = self.vit_model.eval(img.squeeze().cpu().numpy(), feat=x.squeeze().cpu().numpy())[0]
+        if out.dtype == np.uint16:
+            out = out.astype(np.int16)
         out = torch.from_numpy(out).unsqueeze(0).to(x.device)
         return out
         feat = x
         out = self.vit_model.eval(img.squeeze().cpu().numpy(), feat=x.squeeze().cpu().numpy())[0]
+        if out.dtype == np.uint16:
+            out = out.astype(np.int16)
         out = torch.from_numpy(out).unsqueeze(0).to(x.device)
         return out, 0., feat