HirraA commited on Jul 21, 2025

Commit

168ec29

verified ·

1 Parent(s): cf4d9b9

Upload 30 files

Browse files

Files changed (30) hide show

NWRD_dataset.py +40 -0
README.md +52 -3
config.py +17 -0
dataloader.cpython-37.pyc +0 -0
dataloader.py +50 -0
dataset.cpython-38.pyc +0 -0
dataset.py +259 -0
dataset_preprocessing.ipynb +1046 -0
evaluator.cpython-37.pyc +0 -0
evaluator.py +490 -0
hist_of_pixel_values.py +41 -0
loss.py +56 -0
main.cpython-37.pyc +0 -0
main.cpython-38.pyc +0 -0
main.py +315 -0
preprocessing.py +131 -0
requirements.txt +12 -0
segmentation_metrics.py +60 -0
select_results.py +63 -0
sort_results.py +156 -0
test.py +91 -0
train.py +289 -0
train_wandb.ipynb +459 -0
util.cpython-38.pyc +0 -0
util.py +92 -0
utils.ipynb +0 -0
utils.py +83 -0
vgg.cpython-37.pyc +0 -0
vgg.cpython-38.pyc +0 -0
vgg.py +120 -0

NWRD_dataset.py ADDED Viewed

	@@ -0,0 +1,40 @@

+from torch.utils.data import Dataset
+from PIL import Image
+import os
+from torchvision import transforms
+class NWRD(Dataset):
+    def __init__(self, root_dir, transform=None, train=True):
+        self.root_dir = root_dir
+        self.transform = transform
+        self.images = []
+        self.labels = []
+        self.load_data()
+    def load_data(self):
+        rust_dir = os.path.join(self.root_dir, "rust")
+        non_rust_dir = os.path.join(self.root_dir, "non_rust")
+        # Load rust images
+        for filename in os.listdir(rust_dir):
+            filepath = os.path.join(rust_dir, filename)
+            self.images.append(filepath)
+            self.labels.append(1)
+        # Load non-rust images
+        for filename in os.listdir(non_rust_dir):
+            filepath = os.path.join(non_rust_dir, filename)
+            self.images.append(filepath)
+            self.labels.append(0)
+    def __len__(self):
+        return len(self.images)
+    def __getitem__(self, idx):
+        image_path = self.images[idx]
+        image = Image.open(image_path).convert('RGB')
+        label = int(self.labels[idx])
+        if self.transform:
+            image = self.transform(image)
+        return image, label

README.md CHANGED Viewed

@@ -1,3 +1,52 @@
----
-license: mit
----

+# DCFM
+The official repo of the paper `Democracy Does Matter: Comprehensive Feature Mining for Co-Salient Object Detection`.
+## Environment Requirement
+create enviroment and intall as following:
+`pip install -r requirements.txt`
+## Data Format
+  trainset: CoCo-SEG
+  testset: CoCA, CoSOD3k, Cosal2015
+  Put the [CoCo-SEG](https://drive.google.com/file/d/1GbA_WKvJm04Z1tR8pTSzBdYVQ75avg4f/view), [CoCA](http://zhaozhang.net/coca.html), [CoSOD3k](http://dpfan.net/CoSOD3K/) and [Cosal2015](https://drive.google.com/u/0/uc?id=1mmYpGx17t8WocdPcw2WKeuFpz6VHoZ6K&export=download) datasets to `DCFM/data` as the following structure:
+  ```
+  DCFM
+     ├── other codes
+     ├── ...
+     │
+     └── data
+           ├── CoCo-SEG (CoCo-SEG's image files)
+           ├── CoCA (CoCA's image files)
+           ├── CoSOD3k (CoSOD3k's image files)
+           └── Cosal2015 (Cosal2015's image files)
+  ```
+## Trained model
+trained model can be downloaded from [papermodel](https://drive.google.com/file/d/1cfuq4eJoCwvFR9W1XOJX7Y0ttd8TGjlp/view?usp=sharing).
+Run `test.py` for inference.
+The evaluation tool please follow: https://github.com/zzhanghub/eval-co-sod
+<!-- USAGE EXAMPLES -->
+## Usage
+Download pretrainde backbone model [VGG](https://drive.google.com/file/d/1Z1aAYXMyJ6txQ1Z9N7gtxLOIai4dxrXd/view?usp=sharing).
+Run `train.py` for training.
+## Prediction results
+The co-saliency maps of DCFM can be found at [preds](https://drive.google.com/file/d/1wGeNHXFWVSyqvmL4NIUmEFdlHDovEtQR/view?usp=sharing).
+## Reproduction
+reproductions by myself on 2080Ti can be found at [reproduction1](https://drive.google.com/file/d/1vovii0RtYR_EC0Y2zxjY_cTWKWM3WaxP/view?usp=sharing) and [reproduction2](https://drive.google.com/file/d/1YPOKZ5kBtmZrCDhHpP3-w1GMVR5BfDoU/view?usp=sharing).
+reprodution by myself on TITAN X can be found at [reproduction3](https://drive.google.com/file/d/1bnGFtRTYkVXqI2dcjeWFRDXnqqbUUBJr/view?usp=sharing).
+## Others
+The code is based on [GCoNet](https://github.com/fanq15/GCoNet).
+I've added a validation part to help select the model for closer results. This validation part is based on [GCoNet_plus](https://github.com/ZhengPeng7/GCoNet_plus). You can try different evaluation metrics to select the model.

config.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+class Config():
+    def __init__(self) -> None:
+        # Performance of GCoNet
+        self.val_measures = {
+            'Emax': {'CoCA': 0.783, 'CoSOD3k': 0.874, 'CoSal2015': 0.892},
+            'Smeasure': {'CoCA': 0.710, 'CoSOD3k': 0.810, 'CoSal2015': 0.838},
+            'Fmax': {'CoCA': 0.598, 'CoSOD3k': 0.805, 'CoSal2015': 0.856},
+        }
+        # others
+        self.validation = True

dataloader.cpython-37.pyc ADDED Viewed

Binary file (1.82 kB). View file

dataloader.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from torch.utils import data
+import os
+from PIL import Image, ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+class EvalDataset(data.Dataset):
+    def __init__(self, pred_root, label_root, return_predpath=False, return_gtpath=False):
+        self.return_predpath = return_predpath
+        self.return_gtpath = return_gtpath
+        pred_dirs = os.listdir(pred_root)
+        label_dirs = os.listdir(label_root)
+        dir_name_list = []
+        for idir in pred_dirs:
+            if idir in label_dirs:
+                pred_names = os.listdir(os.path.join(pred_root, idir))
+                label_names = os.listdir(os.path.join(label_root, idir))
+                for iname in pred_names:
+                    if iname in label_names:
+                        dir_name_list.append(os.path.join(idir, iname))
+        self.image_path = list(
+            map(lambda x: os.path.join(pred_root, x), dir_name_list))
+        self.label_path = list(
+            map(lambda x: os.path.join(label_root, x), dir_name_list))
+        self.labels = []
+        for p in self.label_path:
+            self.labels.append(Image.open(p).convert('L'))
+    def __getitem__(self, item):
+        predpath = self.image_path[item]
+        gtpath = self.label_path[item]
+        pred = Image.open(predpath).convert('L')
+        gt = self.labels[item]
+        if pred.size != gt.size:
+            pred = pred.resize(gt.size, Image.BILINEAR)
+        returns = [pred, gt]
+        if self.return_predpath:
+            returns.append(predpath)
+        if self.return_gtpath:
+            returns.append(gtpath)
+        return returns
+    def __len__(self):
+        return len(self.image_path)

dataset.cpython-38.pyc ADDED Viewed

Binary file (8.83 kB). View file

dataset.py ADDED Viewed

	@@ -0,0 +1,259 @@

+import os
+from PIL import Image, ImageOps, ImageFilter#, PILLOW_VERSION
+import torch
+import random
+import numpy as np
+from torch.utils import data
+from torchvision import transforms
+from torchvision.transforms import functional as F
+import numbers
+import random
+import pandas as pd
+class CoData(data.Dataset):
+    def __init__(self, img_root, gt_root, img_size, transform, max_num, is_train):
+        class_list = os.listdir(img_root)
+        self.size = [img_size, img_size]
+        self.img_dirs = list(
+            map(lambda x: os.path.join(img_root, x), class_list))
+        self.gt_dirs = list(
+            map(lambda x: os.path.join(gt_root, x), class_list))
+        self.transform = transform
+        self.max_num = max_num
+        self.is_train = is_train
+    def __getitem__(self, item):
+        names = os.listdir(self.img_dirs[item])
+        num = len(names)
+        img_paths = list(
+            map(lambda x: os.path.join(self.img_dirs[item], x), names))
+        gt_paths = list(
+            map(lambda x: os.path.join(self.gt_dirs[item], x[:-4]+'.png'), names))
+        if self.is_train:
+            final_num = min(num, self.max_num)
+            sampled_list = random.sample(range(num), final_num)
+            # print(sampled_list)
+            new_img_paths = [img_paths[i] for i in sampled_list]
+            img_paths = new_img_paths
+            new_gt_paths = [gt_paths[i] for i in sampled_list]
+            gt_paths = new_gt_paths
+            final_num = final_num
+        else:
+            final_num = num
+        imgs = torch.Tensor(final_num, 3, self.size[0], self.size[1])
+        gts = torch.Tensor(final_num, 1, self.size[0], self.size[1])
+        subpaths = []
+        ori_sizes = []
+        for idx in range(final_num):
+            # print(idx)
+            img = Image.open(img_paths[idx]).convert('RGB')
+            gt = Image.open(gt_paths[idx]).convert('L')
+            subpaths.append(os.path.join(img_paths[idx].split('/')[-2], img_paths[idx].split('/')[-1][:-4]+'.png'))
+            ori_sizes.append((img.size[1], img.size[0]))
+            # ori_sizes += ((img.size[1], img.size[0]))
+            [img, gt] = self.transform(img, gt)
+            imgs[idx] = img
+            gts[idx] = gt
+        if self.is_train:
+            cls_ls = [item] * int(final_num)
+            return imgs, gts, subpaths, ori_sizes, cls_ls
+        else:
+            return imgs, gts, subpaths, ori_sizes
+    def __len__(self):
+        return len(self.img_dirs)
+class FixedResize(object):
+    def __init__(self, size):
+        self.size = (size, size)  # size: (h, w)
+    def __call__(self, img, gt):
+        # assert img.size == gt.size
+        img = img.resize(self.size, Image.BILINEAR)
+        gt = gt.resize(self.size, Image.NEAREST)
+        # gt = gt.resize(self.size, Image.BILINEAR)
+        return img, gt
+class ToTensor(object):
+    def __call__(self, img, gt):
+        return F.to_tensor(img), F.to_tensor(gt)
+class Normalize(object):
+    """Normalize a tensor image with mean and standard deviation.
+    Args:
+        mean (tuple): means for each channel.
+        std (tuple): standard deviations for each channel.
+    """
+    def __init__(self, mean=(0., 0., 0.), std=(1., 1., 1.)):
+        self.mean = mean
+        self.std = std
+    def __call__(self, img, gt):
+        img = F.normalize(img, self.mean, self.std)
+        return img, gt
+class RandomHorizontalFlip(object):
+    def __init__(self, p=0.5):
+        self.p = p
+    def __call__(self, img, gt):
+        if random.random() < self.p:
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+            gt = gt.transpose(Image.FLIP_LEFT_RIGHT)
+        return img, gt
+class RandomScaleCrop(object):
+    def __init__(self, base_size, crop_size, fill=0):
+        self.base_size = base_size
+        self.crop_size = crop_size
+        self.fill = fill
+    def __call__(self, img, mask):
+        # random scale (short edge)
+        # img = img.numpy()
+        # mask = mask.numpy()
+        short_size = random.randint(int(self.base_size * 0.8), int(self.base_size * 1.2))
+        w, h = img.size
+        if h > w:
+            ow = short_size
+            oh = int(1.0 * h * ow / w)
+        else:
+            oh = short_size
+            ow = int(1.0 * w * oh / h)
+        img = img.resize((ow, oh), Image.BILINEAR)
+        mask = mask.resize((ow, oh), Image.NEAREST)
+        # pad crop
+        if short_size < self.crop_size:
+            padh = self.crop_size - oh if oh < self.crop_size else 0
+            padw = self.crop_size - ow if ow < self.crop_size else 0
+            img = ImageOps.expand(img, border=(0, 0, padw, padh), fill=0)
+            mask = ImageOps.expand(mask, border=(0, 0, padw, padh), fill=self.fill)
+        # random crop crop_size
+        w, h = img.size
+        x1 = random.randint(0, w - self.crop_size)
+        y1 = random.randint(0, h - self.crop_size)
+        img = img.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
+        mask = mask.crop((x1, y1, x1 + self.crop_size, y1 + self.crop_size))
+        return img, mask
+class RandomRotation(object):
+    def __init__(self, degrees, resample=False, expand=False, center=None):
+        if isinstance(degrees, numbers.Number):
+            if degrees < 0:
+                raise ValueError("If degrees is a single number, it must be positive.")
+            self.degrees = (-degrees, degrees)
+        else:
+            if len(degrees) != 2:
+                raise ValueError("If degrees is a sequence, it must be of len 2.")
+            self.degrees = degrees
+        self.resample = resample
+        self.expand = expand
+        self.center = center
+    @staticmethod
+    def get_params(degrees):
+        angle = random.uniform(degrees[0], degrees[1])
+        return angle
+    def __call__(self, img, gt):
+        """
+            img (PIL Image): Image to be rotated.
+        Returns:
+            PIL Image: Rotated image.
+        """
+        angle = self.get_params(self.degrees)
+        return F.rotate(img, angle, Image.BILINEAR, self.expand, self.center), F.rotate(gt, angle, Image.NEAREST, self.expand, self.center)
+class Compose(object):
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img, gt):
+        for t in self.transforms:
+            img, gt = t(img, gt)
+        return img, gt
+    def __repr__(self):
+        format_string = self.__class__.__name__ + '('
+        for t in self.transforms:
+            format_string += '\n'
+            format_string += '    {0}'.format(t)
+        format_string += '\n)'
+        return format_string
+# get the dataloader (Note: without data augmentation)
+def get_loader(img_root, gt_root, img_size, batch_size, max_num = float('inf'), istrain=True, shuffle=False, num_workers=0, pin=False):
+    if istrain:
+        transform = Compose([
+            RandomScaleCrop(img_size*2, img_size*2),
+            FixedResize(img_size),
+            RandomHorizontalFlip(),
+            RandomRotation((-90, 90)),
+            ToTensor(),
+            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+    else:
+        transform = Compose([
+            FixedResize(img_size),
+            # RandomHorizontalFlip(),
+            ToTensor(),
+            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        ])
+    dataset = CoData(img_root, gt_root, img_size, transform, max_num, is_train=istrain)
+    data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers,
+                                  pin_memory=pin)
+    return data_loader
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    mean = [0.485, 0.456, 0.406]
+    std = [0.229, 0.224, 0.225]
+    img_root = './data/testtrain/img/'
+    gt_root = './data/testtrain/gt/'
+    loader = get_loader(img_root, gt_root, 20, 1, 16, istrain=False)
+    for batch in loader:
+        b, c, h, w = batch[0][0].shape
+        for i in range(b):
+            img = batch[0].squeeze(0)[i].permute(1, 2, 0).cpu().numpy() * std + mean
+            image = img * 255
+            mask = batch[1].squeeze(0)[i].squeeze().cpu().numpy()
+            plt.subplot(121)
+            plt.imshow(np.uint8(image))
+            plt.subplot(122)
+            plt.imshow(mask)
+            plt.show(block=True)

dataset_preprocessing.ipynb ADDED Viewed

	@@ -0,0 +1,1046 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/wej36how/.conda/envs/dmt/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import glob\n",
+    "import os\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "from PIL import Image\n",
+    "import torch \n",
+    "from PIL import Image\n",
+    "from pathlib import Path\n",
+    "import torchvision.transforms as T\n",
+    "import torchvision.transforms.functional as TF\n",
+    "import numpy as np\n",
+    "from torchvision import transforms\n",
+    "import os\n",
+    "import cv2\n",
+    "import matplotlib.pyplot as plt\n",
+    "import shutil"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "source = \"/scratch/wej36how/Datasets/NWRD/val\"\n",
+    "dest = \"/scratch/wej36how/Datasets/NWRDProcessed/val\"\n",
+    "patch_size = 224\n",
+    "rust_threshold = 150\n",
+    "max_number_of_images_per_group = 12"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This snippet will make patches of the images in the destination/patches directory."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "patches_path = os.path.join(dest, \"patches\")\n",
+    "images_dir = os.path.join(patches_path, \"images\")\n",
+    "masks_dir = os.path.join(patches_path, \"masks\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/scratch/wej36how/Datasets/NWRD/train/images/10.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/100.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/101.jpg\n",
+      "image shape: (3456, 3612, 3)\n",
+      "total patches:  240\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/102.jpg\n",
+      "image shape: (2984, 4248, 3)\n",
+      "total patches:  234\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/103.jpg\n",
+      "image shape: (3584, 3456, 3)\n",
+      "total patches:  226\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/104.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/109.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/11.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/110.jpg\n",
+      "image shape: (4600, 2536, 3)\n",
+      "total patches:  220\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/111.jpg\n",
+      "image shape: (2909, 4608, 3)\n",
+      "total patches:  240\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/113.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/114.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/117.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/118.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/119.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/12.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/121.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/122.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/124.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/125.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/128.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/129.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/13.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/130.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/131.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/132.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/133.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/134.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/135.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/136.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/137.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/138.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/139.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/14.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/140.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/141.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/142.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/144.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/145.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/146.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/149.jpg\n",
+      "image shape: (3968, 3424, 3)\n",
+      "total patches:  255\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/15.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/150.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/19.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/2.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/21.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/24.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/26.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/27.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/28.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/3.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/30.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/31.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/32.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/33.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/5.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/57.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/58.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/60.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/64.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/69.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/71.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/72.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/73.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/74.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/75.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/76.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/78.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/79.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/81.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/83.jpg\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/85.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/86.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/87.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/88.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/9.jpg\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/90.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/91.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/92.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/93.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/94.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/95.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/96.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/98.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/images/99.jpg\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "total image count: 28523\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/10.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/100.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/101.png\n",
+      "image shape: (3435, 3593, 3)\n",
+      "total patches:  240\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/102.png\n",
+      "image shape: (2984, 4248, 3)\n",
+      "total patches:  234\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/103.png\n",
+      "image shape: (3584, 3456, 3)\n",
+      "total patches:  226\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/104.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/109.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/11.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/110.png\n",
+      "image shape: (4600, 2536, 3)\n",
+      "total patches:  220\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/111.png\n",
+      "image shape: (2909, 4608, 3)\n",
+      "total patches:  240\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/113.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/114.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/117.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/118.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/119.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/12.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/121.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/122.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/124.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/125.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/128.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/129.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/13.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/130.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/131.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/132.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/133.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/134.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/135.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/136.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/137.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/138.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/139.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/14.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/140.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/141.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/142.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/144.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/145.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/146.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/149.png\n",
+      "image shape: (3968, 3424, 3)\n",
+      "total patches:  255\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/15.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/150.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/19.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/2.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/21.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/24.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/26.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/27.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/28.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/3.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/30.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/31.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/32.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/33.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/5.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/57.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/58.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/60.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/64.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/69.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/71.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/72.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/73.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/74.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/75.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/76.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/78.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/79.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/81.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/83.png\n",
+      "image shape: (3456, 4608, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/85.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/86.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/87.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/88.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/9.png\n",
+      "image shape: (4000, 6016, 3)\n",
+      "total patches:  442\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/90.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/91.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/92.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/93.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/94.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/95.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/96.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/98.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "/scratch/wej36how/Datasets/NWRD/train/masks/99.png\n",
+      "image shape: (4608, 3456, 3)\n",
+      "total patches:  300\n",
+      "total masks count: 28523\n"
+     ]
+    }
+   ],
+   "source": [
+    "masks_paths = glob.glob(f'{os.path.join(source, \"masks\", \"*\")}')\n",
+    "images_paths = glob.glob(f'{os.path.join(source, \"images\", \"*\")}')\n",
+    "images_paths.sort()\n",
+    "masks_paths.sort()\n",
+    "\n",
+    "\n",
+    "os.makedirs(patches_path)\n",
+    "os.makedirs(images_dir)\n",
+    "os.makedirs(masks_dir)\n",
+    "\n",
+    "def create_patches(fname):\n",
+    "    x = 0\n",
+    "    y = 0\n",
+    "    patches = []\n",
+    "    img = cv2.imread(fname)\n",
+    "    print(\"image shape:\",img.shape)\n",
+    "    p_num = 0\n",
+    "    while (y + patch_size < img.shape[0]):\n",
+    "        \n",
+    "        if (x + patch_size > img.shape[1]):\n",
+    "            x = 0\n",
+    "            y += patch_size\n",
+    "        if y + patch_size <= img.shape[0] and x + patch_size <= img.shape[1]:\n",
+    "            patches.append([x, y])\n",
+    "        x += patch_size\n",
+    "    print(\"total patches: \", len(patches))\n",
+    "    return patches\n",
+    "\n",
+    "total_count = 0\n",
+    "for u in images_paths:\n",
+    "    print(u)\n",
+    "    patches = create_patches(u)\n",
+    "    bgr = cv2.imread(u)\n",
+    "    image_name = u.split('/')[-1].split('.')[0]\n",
+    "    total_count += len(patches)\n",
+    "\n",
+    "    for count, P in enumerate(patches):\n",
+    "        cv2.imwrite(os.path.join(images_dir,f\"{image_name}_{count}.png\"), bgr[P[1]:P[1]+patch_size,P[0]:P[0]+patch_size])\n",
+    "            \n",
+    "print(\"total image count:\", total_count)\n",
+    "\n",
+    "total_count = 0\n",
+    "for u in masks_paths:\n",
+    "    print(u)\n",
+    "    patches = create_patches(u)\n",
+    "    bgr = cv2.imread(u)\n",
+    "    image_name = u.split('/')[-1].split('.')[0]\n",
+    "\n",
+    "    total_count += len(patches)\n",
+    "\n",
+    "    for count, P in enumerate(patches):\n",
+    "        cv2.imwrite(os.path.join(masks_dir,f\"{image_name}_{count}.png\"), bgr[P[1]:P[1]+patch_size,P[0]:P[0]+patch_size])\n",
+    "            \n",
+    "print(\"total masks count:\", total_count)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This will saperate the rust and non rust patches and put them in and put them in directory destination/RustNonRustSplit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "destination = os.path.join(dest, \"RustNonRustSplit\")\n",
+    "root = patches_path\n",
+    "\n",
+    "os.makedirs(destination)\n",
+    "os.makedirs(os.path.join(destination,\"non_rust\",\"images\"))\n",
+    "os.makedirs(os.path.join(destination,\"non_rust\",\"masks\"))\n",
+    "os.makedirs(os.path.join(destination,\"rust\",\"images\"))\n",
+    "os.makedirs(os.path.join(destination,\"rust\",\"masks\"))\n",
+    "\n",
+    "masks_path = os.path.join(root, \"masks\", \"*.png\")\n",
+    "masks_paths = glob.glob(masks_path)\n",
+    "minimum=1000\n",
+    "min_patch=0\n",
+    "rust_count=0\n",
+    "non_rust_count=0\n",
+    "\n",
+    "for mask_path in masks_paths:\n",
+    "    patch_name = mask_path.split(\"/\")[-1].split(\".\")[0]\n",
+    "    \n",
+    "    patch_mask = cv2.imread(mask_path, 0)\n",
+    "    patch_img = cv2.imread(os.path.join(root, \"images\",patch_name+\".png\"))\n",
+    "\n",
+    "    condition = (patch_mask > 150)\n",
+    "    count = np.sum(condition)\n",
+    "        \n",
+    "    if count<=rust_threshold:\n",
+    "            cv2.imwrite(os.path.join(destination,\"non_rust\",\"images\",f\"{patch_name}.png\"), patch_img)\n",
+    "            cv2.imwrite(os.path.join(destination,\"non_rust\",\"masks\",f\"{patch_name}.png\"), patch_mask)\n",
+    "            non_rust_count+=1\n",
+    "    else:\n",
+    "            if (count<=minimum):\n",
+    "                   minimum=count\n",
+    "                   min_patch = patch_name\n",
+    "            cv2.imwrite(os.path.join(destination,\"rust\",\"images\",f\"{patch_name}.png\"), patch_img)\n",
+    "            cv2.imwrite(os.path.join(destination,\"rust\",\"masks\",f\"{patch_name}.png\"), patch_mask)\n",
+    "            rust_count+=1\n",
+    "\n",
+    "print(\"minimum rust patch:\",min_patch)\n",
+    "print(\"minimum rust patch white pixels:\",minimum)\n",
+    "print(\"rust count=\", rust_count)\n",
+    "print(\"non rust count=\", non_rust_count)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run the next two code snippets for training only. The following code will augment the images in the destination/RustNonRustSplit/images and destination/RustNonRustSplit/masks folder. "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#flip images horizontally\n",
+    "def flip_images_hor(input_image):\n",
+    "    # Iterate over the images in the input directory\n",
+    "    transform_hflip = T.RandomHorizontalFlip(p=1.0)  # Set probability to 1.0 to always flip\n",
+    "    return transform_hflip(input_image)\n",
+    "\n",
+    "#flip images vertically\n",
+    "def flip_images_ver(input_image):\n",
+    "    # Iterate over the images in the input directory\n",
+    "    transform_vflip = T.RandomVerticalFlip(p=1.0)  # Set probability to 1.0 to always flip\n",
+    "    return transform_vflip(input_image) \n",
+    "  \n",
+    "def shear_vertical(input_image, shear_factor=45):\n",
+    "    # Apply vertical shear\n",
+    "    sheared_image = TF.affine(input_image, angle=0, translate=(0, 0), scale=1, shear=(0, shear_factor))\n",
+    "    return sheared_image\n",
+    "\n",
+    "def shear_horizontal(input_image, shear_factor=45):  # Increased shear for testing\n",
+    "    sheared_image = TF.affine(input_image, angle=0, translate=(0, 0), scale=1, shear=(shear_factor, 0))\n",
+    "    return sheared_image\n",
+    "\n",
+    "def rotate_images(input_image, angle=45):\n",
+    "    # Convert PIL Image to NumPy array\n",
+    "    input_array = np.array(input_image)\n",
+    "    # Rotate the image\n",
+    "    height, width = input_array.shape[:2]\n",
+    "    rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)\n",
+    "    rotated_array = cv2.warpAffine(input_array, rotation_matrix, (width, height))\n",
+    "    # Convert NumPy array back to PIL Image\n",
+    "    rotated_image = Image.fromarray(rotated_array)\n",
+    "    return rotated_image\n",
+    "\n",
+    "def dark(input_image,gamma):\n",
+    "    dark_image= TF.adjust_gamma(input_image, gamma)\n",
+    "    return dark_image\n",
+    "\n",
+    "def augment_image(img_path):\n",
+    "\n",
+    "  # Apply the transformations\n",
+    "  \n",
+    "    #orig_image\n",
+    "    orig_img = Image.open(Path(img_path))\n",
+    "    \n",
+    "    #flip images\n",
+    "    img_hflipped = flip_images_hor(orig_img)\n",
+    "    img_vflipped = flip_images_ver(orig_img)\n",
+    "    \n",
+    "    \n",
+    "    #shear images\n",
+    "    hor_shear = shear_horizontal(orig_img)\n",
+    "    ver_shear = shear_vertical(orig_img)\n",
+    "    \n",
+    "    #dark\n",
+    "    img_dark = dark(img_hflipped, 2)\n",
+    "    img_rot = rotate_images(orig_img, angle=45)\n",
+    "    \n",
+    "    return [img_dark,img_hflipped,img_vflipped,hor_shear,ver_shear, img_rot]\n",
+    "\n",
+    "def creating_file_with_augmented_images(file_path_master_dataset, file_path_augmented_images):\n",
+    "    master_dataset_folder = file_path_master_dataset\n",
+    "    files_in_master_dataset = os.listdir(file_path_master_dataset)\n",
+    "    augmented_images_folder = file_path_augmented_images\n",
+    "    \n",
+    "    for image_name in files_in_master_dataset:\n",
+    "        image_path = os.path.join(master_dataset_folder, image_name)\n",
+    "        required_images = augment_image(image_path)  # Assuming augment_image is defined elsewhere\n",
+    "        i = 0\n",
+    "        for augmented_image in required_images:\n",
+    "            # Convert RGBA to RGB if necessary\n",
+    "            if augmented_image.mode == 'RGBA':\n",
+    "                augmented_image = augmented_image.convert('RGB')\n",
+    "                \n",
+    "            # Save as png\n",
+    "            augmented_image_path = os.path.join(augmented_images_folder, f\"aug{i}_{image_name}\")\n",
+    "            augmented_image.save(augmented_image_path, format='png')\n",
+    "            i += 1\n",
+    "\n",
+    "master_dataset = os.path.join(destination,\"rust\",\"images\")\n",
+    "augmented_dataset = os.path.join(destination,\"rust\",\"images\")\n",
+    "creating_file_with_augmented_images(master_dataset,augmented_dataset)\n",
+    "\n",
+    "master_dataset = os.path.join(destination,\"rust\",\"masks\")\n",
+    "augmented_dataset = os.path.join(destination,\"rust\",\"masks\")\n",
+    "creating_file_with_augmented_images(master_dataset,augmented_dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run next snippet only for training dataset. To remove patches that have their rust removed becuase of their augmentations"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "minimum rust patch: 0\n",
+      "minimum rust patch white pixels: 1000\n",
+      "rust count= 0\n",
+      "non rust count= 0\n"
+     ]
+    }
+   ],
+   "source": [
+    "root = os.path.join(destination,\"rust\")\n",
+    "\n",
+    "non_rust_images_dir = os.path.join(destination,\"non_rust\",\"images\")\n",
+    "non_rust_masks_dir = os.path.join(destination,\"non_rust\",\"masks\")\n",
+    "\n",
+    "masks_path = os.path.join(root, \"masks\", \"*.png\")\n",
+    "masks_paths = glob.glob(masks_path)\n",
+    "minimum=1000\n",
+    "min_patch=0\n",
+    "rust_count=0\n",
+    "non_rust_count=0\n",
+    "\n",
+    "for mask_path in masks_paths:\n",
+    "    patch_name = mask_path.split(\"/\")[-1].split(\".\")[0]\n",
+    "    \n",
+    "    patch_mask = cv2.imread(mask_path, 0)\n",
+    "    patch_img = cv2.imread(os.path.join(root, \"images\",patch_name+\".png\"))\n",
+    "\n",
+    "    condition = (patch_mask > 150)\n",
+    "    count = np.sum(condition)\n",
+    "        \n",
+    "    if count<=rust_threshold:\n",
+    "            os.remove(mask_path)\n",
+    "            os.remove(os.path.join(root, \"images\",patch_name+\".png\"))\n",
+    "\n",
+    "            cv2.imwrite(os.path.join(non_rust_images_dir,f\"{patch_name}.png\"), patch_img)\n",
+    "            cv2.imwrite(os.path.join(non_rust_masks_dir,f\"{patch_name}.png\"), patch_mask)\n",
+    "            non_rust_count+=1\n",
+    "    else:\n",
+    "            if (count<=minimum):\n",
+    "                   minimum=count\n",
+    "                   min_patch = patch_name\n",
+    "        #     cv2.imwrite(os.path.join(destination,\"rust\",\"images\",f\"{patch_name}.png\"), patch_img)\n",
+    "        #     cv2.imwrite(os.path.join(destination,\"rust\",\"masks\",f\"{patch_name}.png\"), patch_mask)\n",
+    "            rust_count+=1\n",
+    "\n",
+    "print(\"minimum rust patch:\",min_patch)\n",
+    "print(\"minimum rust patch white pixels:\",minimum)\n",
+    "print(\"rust count=\", rust_count)\n",
+    "print(\"non rust count=\", non_rust_count)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create a dataset for classification model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDFprocessed\\\\train\\\\calssification\\\\non_rust'"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "rust_images_dir = os.path.join(destination,\"rust\",\"images\")\n",
+    "non_rust_images_dir = os.path.join(destination,\"non_rust\",\"images\")\n",
+    "\n",
+    "rustClassificationDir = os.path.join(dest, \"calssification\", \"rust\")\n",
+    "nonRustClassificationDir = os.path.join(dest, \"calssification\", \"non_rust\")\n",
+    "os.makedirs(rustClassificationDir, exist_ok=True)\n",
+    "os.makedirs(nonRustClassificationDir, exist_ok=True)\n",
+    "\n",
+    "shutil.copytree(rust_images_dir,rustClassificationDir, dirs_exist_ok=True)\n",
+    "shutil.copytree(non_rust_images_dir,nonRustClassificationDir, dirs_exist_ok=True)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Run the next code snippet for training dataset only. It deletes non-rust patches to match rust patches in the classification folder only."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import glob\n",
+    "\n",
+    "def delete_extra_images(directory, target_count):\n",
+    "    # Get a list of all image files in the directory\n",
+    "    image_files = glob.glob(os.path.join(directory, '*.JPG')) + glob.glob(os.path.join(directory, '*.jpeg')) + glob.glob(os.path.join(directory, '*.png'))\n",
+    "    \n",
+    "    # Check if the number of images exceeds the target count\n",
+    "    if len(image_files) > target_count:\n",
+    "        # Calculate the number of images to delete\n",
+    "        num_to_delete = len(image_files) - target_count\n",
+    "        # Sort the images by modification time (oldest first)\n",
+    "        image_files.sort(key=os.path.getmtime)\n",
+    "        # Delete the extra images\n",
+    "        for i in range(num_to_delete):\n",
+    "            os.remove(image_files[i])\n",
+    "        print(f\"{num_to_delete} images deleted.\")\n",
+    "    elif len(image_files) < target_count:\n",
+    "        print(\"Warning: Number of images in directory is less than the target count.\")\n",
+    "\n",
+    "if len(os.listdir(rustClassificationDir))< len(os.listdir(nonRustClassificationDir)):\n",
+    "    delete_extra_images(nonRustClassificationDir, len(os.listdir(rustClassificationDir)))\n",
+    "else:\n",
+    "    delete_extra_images(rustClassificationDir, len(os.listdir(nonRustClassificationDir)))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The following code creates a coslaiency style structure for co-saliency models training"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "rust_dir = os.path.join(destination,\"rust\")\n",
+    "rustCosaliencynDir = os.path.join(dest, \"cosaliency\")\n",
+    "shutil.copytree(rust_dir,rustCosaliencynDir, dirs_exist_ok=True)\n",
+    "\n",
+    "# Function to split images into folders based on image number\n",
+    "def split_images_into_folders(source_dir, destination_dir):\n",
+    "    # Create destination directory if it doesn't exist\n",
+    "    if not os.path.exists(destination_dir):\n",
+    "        os.makedirs(destination_dir)\n",
+    "    # Iterate through files in the source directory\n",
+    "    for filename in os.listdir(source_dir):\n",
+    "        if filename.endswith('.png'):\n",
+    "            image_no = filename.split('_')[0]  # Extract image number from filename\n",
+    "            if not image_no.isdigit():\n",
+    "                image_no = filename.split('_')[1]\n",
+    "            destination_subdir = os.path.join(destination_dir, image_no)\n",
+    "            # Create subdirectory if it doesn't exist\n",
+    "            if not os.path.exists(destination_subdir):\n",
+    "                os.makedirs(destination_subdir)\n",
+    "            # Move the image file to the respective subdirectory\n",
+    "            shutil.move(os.path.join(source_dir, filename), destination_subdir)\n",
+    "\n",
+    "def organize_images(main_directory):\n",
+    "    # Ensure the main directory exists\n",
+    "    if not os.path.exists(main_directory):\n",
+    "        print(f\"The specified main directory '{main_directory}' does not exist.\")\n",
+    "        return\n",
+    "\n",
+    "    # Get a list of subdirectories in the main directory\n",
+    "    subdirectories = [d for d in os.listdir(main_directory) if os.path.isdir(os.path.join(main_directory, d))]\n",
+    "\n",
+    "    # Process each subdirectory\n",
+    "    for subdir in subdirectories:\n",
+    "        subdir_path = os.path.join(main_directory, subdir)\n",
+    "\n",
+    "        # Get a list of images in the subdirectory\n",
+    "        images = [f for f in os.listdir(subdir_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]\n",
+    "        # Determine the number of images per subdirectory\n",
+    "        images_per_subdir = 12\n",
+    "        num_subdirectories = len(images) // images_per_subdir\n",
+    "        n=0\n",
+    "        # Create additional subdirectories if needed\n",
+    "        for i in range(num_subdirectories - 1):\n",
+    "            new_subdir_name = f\"{subdir}_part{i + 1}\"\n",
+    "            new_subdir_path = os.path.join(main_directory, new_subdir_name)\n",
+    "\n",
+    "            # Create the new subdirectory\n",
+    "            os.makedirs(new_subdir_path)\n",
+    "\n",
+    "            # Move images to the new subdirectory\n",
+    "            for j in range(images_per_subdir):\n",
+    "                old_image_path = os.path.join(subdir_path, images[n])\n",
+    "                new_image_path = os.path.join(new_subdir_path, images[n])\n",
+    "                shutil.move(old_image_path, new_image_path)\n",
+    "                n+=1\n",
+    "\n",
+    "source_directory = os.path.join(dest, \"cosaliency\", \"images\")\n",
+    "destination_directory = os.path.join(dest, \"cosaliency\", \"images\")\n",
+    "split_images_into_folders(source_directory, destination_directory)\n",
+    "organize_images(destination_directory)\n",
+    "\n",
+    "source_directory = os.path.join(dest, \"cosaliency\", \"masks\")\n",
+    "destination_directory = os.path.join(dest, \"cosaliency\", \"masks\")\n",
+    "split_images_into_folders(source_directory, destination_directory)\n",
+    "organize_images(destination_directory)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "segformer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

evaluator.cpython-37.pyc ADDED Viewed

Binary file (12.3 kB). View file

evaluator.py ADDED Viewed

	@@ -0,0 +1,490 @@

+import os
+import time
+import json
+import numpy as np
+from scipy.io import savemat
+import torch
+from torchvision import transforms
+from PIL import ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+class Eval_thread():
+    def __init__(self, loader, method='', dataset='', output_dir='', epoch='', cuda=True):
+        self.loader = loader
+        self.method = method
+        self.dataset = dataset
+        self.cuda = cuda
+        self.output_dir = output_dir
+        self.epoch = epoch.split('ep')[-1]
+        self.logfile = os.path.join(output_dir, 'result.txt')
+        self.dataset2smeasure_bottom_bound = {'CoCA': 0.673, 'CoSOD3k': 0.802, 'CoSal2015': 0.845}      # S_measures of GCoNet
+    def run(self, AP=False, AUC=False, save_metrics=False, continue_eval=True):
+        Res = {}
+        start_time = time.time()
+        if continue_eval:
+            s = self.Eval_Smeasure()
+            if s > self.dataset2smeasure_bottom_bound[self.dataset]:
+                mae = self.Eval_mae()
+                Em = self.Eval_Emeasure()
+                max_e = Em.max().item()
+                mean_e = Em.mean().item()
+                Em = Em.cpu().numpy()
+                Fm, prec, recall = self.Eval_fmeasure()
+                max_f = Fm.max().item()
+                mean_f = Fm.mean().item()
+                Fm = Fm.cpu().numpy()
+            else:
+                mae = 1
+                Em = torch.zeros(255).cpu().numpy()
+                max_e = 0
+                mean_e = 0
+                Fm, prec, recall = 0, 0, 0
+                max_f = 0
+                mean_f = 0
+                continue_eval = False
+        else:
+            s = 0
+            mae = 1
+            Em = torch.zeros(255).cpu().numpy()
+            max_e = 0
+            mean_e = 0
+            Fm, prec, recall = 0, 0, 0
+            max_f = 0
+            mean_f = 0
+            continue_eval = False
+        if AP:
+            prec = prec.cpu().numpy()
+            recall = recall.cpu().numpy()
+            avg_p = self.Eval_AP(prec, recall)
+        if AUC:
+            auc, TPR, FPR = self.Eval_auc()
+            TPR = TPR.cpu().numpy()
+            FPR = FPR.cpu().numpy()
+        if save_metrics:
+            os.makedirs(os.path.join(self.output_dir, self.method, self.epoch), exist_ok=True)
+            Res['Sm'] = s
+            if s > self.dataset2smeasure_bottom_bound[self.dataset]:
+                Res['MAE'] = mae
+                Res['MaxEm'] = max_e
+                Res['MeanEm'] = mean_e
+                Res['Em'] = Em
+                Res['Fm'] = Fm
+            else:
+                Res['MAE'] = 1
+                Res['MaxEm'] = 0
+                Res['MeanEm'] = 0
+                Res['Em'] = torch.zeros(255).cpu().numpy()
+                Res['Fm'] = 0
+            if AP:
+                Res['MaxFm'] = max_f
+                Res['MeanFm'] = mean_f
+                Res['AP'] = avg_p
+                Res['Prec'] = prec
+                Res['Recall'] = recall
+            if AUC:
+                Res['AUC'] = auc
+                Res['TPR'] = TPR
+                Res['FPR'] = FPR
+            os.makedirs(os.path.join(self.output_dir, self.method, self.epoch), exist_ok=True)
+            savemat(os.path.join(self.output_dir, self.method, self.epoch, self.dataset + '.mat'), Res)
+        info = '{} ({}): {:.4f} max-Emeasure || {:.4f} S-measure  || {:.4f} max-fm || {:.4f} mae || {:.4f} mean-Emeasure || {:.4f} mean-fm'.format(
+            self.dataset, self.method+'-ep{}'.format(self.epoch), max_e, s, max_f, mae, mean_e, mean_f
+        )
+        if AP:
+            info += ' || {:.4f} AP'.format(avg_p)
+        if AUC:
+            info += ' || {:.4f} AUC'.format(auc)
+        info += '.'
+        self.LOG(info + '\n')
+        return '[cost:{:.4f}s] '.format(time.time() - start_time) + info, continue_eval
+    def Eval_mae(self):
+        if self.epoch:
+            print('Evaluating MAE...')
+        avg_mae, img_num = 0.0, 0.0
+        with torch.no_grad():
+            trans = transforms.Compose([transforms.ToTensor()])
+            for pred, gt in self.loader:
+                if self.cuda:
+                    pred = trans(pred).cuda()
+                    gt = trans(gt).cuda()
+                else:
+                    pred = trans(pred)
+                    gt = trans(gt)
+                mea = torch.abs(pred - gt).mean()
+                if mea == mea:  # for Nan
+                    avg_mae += mea
+                    img_num += 1.0
+            avg_mae /= img_num
+            return avg_mae.item()
+    def Eval_fmeasure(self):
+        print('Evaluating FMeasure...')
+        beta2 = 0.3
+        avg_f, avg_p, avg_r, img_num = 0.0, 0.0, 0.0, 0.0
+        with torch.no_grad():
+            trans = transforms.Compose([transforms.ToTensor()])
+            for pred, gt in self.loader:
+                if self.cuda:
+                    pred = trans(pred).cuda()
+                    gt = trans(gt).cuda()
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                else:
+                    pred = trans(pred)
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt)
+                prec, recall = self._eval_pr(pred, gt, 255)
+                f_score = (1 + beta2) * prec * recall / (beta2 * prec + recall)
+                f_score[f_score != f_score] = 0  # for Nan
+                avg_f += f_score
+                avg_p += prec
+                avg_r += recall
+                img_num += 1.0
+            Fm = avg_f / img_num
+            avg_p = avg_p / img_num
+            avg_r = avg_r / img_num
+            return Fm, avg_p, avg_r
+    def Eval_auc(self):
+        print('Evaluating AUC...')
+        avg_tpr, avg_fpr, avg_auc, img_num = 0.0, 0.0, 0.0, 0.0
+        with torch.no_grad():
+            trans = transforms.Compose([transforms.ToTensor()])
+            for pred, gt in self.loader:
+                if self.cuda:
+                    pred = trans(pred).cuda()
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt).cuda()
+                else:
+                    pred = trans(pred)
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt)
+                TPR, FPR = self._eval_roc(pred, gt, 255)
+                avg_tpr += TPR
+                avg_fpr += FPR
+                img_num += 1.0
+            avg_tpr = avg_tpr / img_num
+            avg_fpr = avg_fpr / img_num
+            sorted_idxes = torch.argsort(avg_fpr)
+            avg_tpr = avg_tpr[sorted_idxes]
+            avg_fpr = avg_fpr[sorted_idxes]
+            avg_auc = torch.trapz(avg_tpr, avg_fpr)
+            return avg_auc.item(), avg_tpr, avg_fpr
+    def Eval_Emeasure(self):
+        print('Evaluating EMeasure...')
+        avg_e, img_num = 0.0, 0.0
+        with torch.no_grad():
+            trans = transforms.Compose([transforms.ToTensor()])
+            Em = torch.zeros(255)
+            if self.cuda:
+                Em = Em.cuda()
+            for pred, gt in self.loader:
+                if self.cuda:
+                    pred = trans(pred).cuda()
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt).cuda()
+                else:
+                    pred = trans(pred)
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt)
+                Em += self._eval_e(pred, gt, 255)
+                img_num += 1.0
+            Em /= img_num
+            return Em
+    def select_by_Smeasure(self, bar=0.9, loader_comp=None, bar_comp=0.1):
+        print('Evaluating SMeasure...')
+        good_ones = []
+        good_ones_comp = []
+        good_ones_gt = []
+        alpha, avg_q, img_num = 0.5, 0.0, 0.0
+        with torch.no_grad():
+            trans = transforms.Compose([transforms.ToTensor()])
+            for (pred, gt, predpath, gtpath), (pred_comp, gt_comp, predpath_comp) in zip(self.loader, loader_comp):
+                # pred X gt
+                if self.cuda:
+                    pred = trans(pred).cuda()
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt).cuda()
+                else:
+                    pred = trans(pred)
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt)
+                y = gt.mean()
+                if y == 0:
+                    x = pred.mean()
+                    Q = 1.0 - x
+                elif y == 1:
+                    x = pred.mean()
+                    Q = x
+                else:
+                    gt[gt >= 0.5] = 1
+                    gt[gt < 0.5] = 0
+                    Q = alpha * self._S_object(
+                        pred, gt) + (1 - alpha) * self._S_region(pred, gt)
+                    if Q.item() < 0:
+                        Q = torch.FloatTensor([0.0])
+                img_num += 1.0
+                avg_q += Q.item()
+                # pred_comp X gt
+                if self.cuda:
+                    pred_comp = trans(pred_comp).cuda()
+                    pred_comp = (pred_comp - torch.min(pred_comp)) / (torch.max(pred_comp) -
+                                                       torch.min(pred_comp) + 1e-20)
+                    gt_comp = trans(gt_comp).cuda()
+                else:
+                    pred_comp = trans(pred_comp)
+                    pred_comp = (pred_comp - torch.min(pred_comp)) / (torch.max(pred_comp) -
+                                                       torch.min(pred_comp) + 1e-20)
+                    gt_comp = trans(gt_comp)
+                y = gt_comp.mean()
+                if y == 0:
+                    x = pred_comp.mean()
+                    Q_comp = 1.0 - x
+                elif y == 1:
+                    x = pred_comp.mean()
+                    Q_comp = x
+                else:
+                    gt_comp[gt_comp >= 0.5] = 1
+                    gt_comp[gt_comp < 0.5] = 0
+                    Q_comp = alpha * self._S_object(
+                        pred_comp, gt_comp) + (1 - alpha) * self._S_region(pred_comp, gt_comp)
+                    if Q_comp.item() < 0:
+                        Q_comp = torch.FloatTensor([0.0])
+                if Q.item() > bar and (Q.item() - Q_comp.item()) > bar_comp:
+                    good_ones.append(predpath)
+                    good_ones_comp.append(predpath_comp)
+                    good_ones_gt.append(gtpath)
+            avg_q /= img_num
+            return avg_q, good_ones, good_ones_comp, good_ones_gt
+    def Eval_Smeasure(self):
+        print('Evaluating SMeasure...')
+        alpha, avg_q, img_num = 0.5, 0.0, 0.0
+        with torch.no_grad():
+            trans = transforms.Compose([transforms.ToTensor()])
+            for pred, gt in self.loader:
+                if self.cuda:
+                    pred = trans(pred).cuda()
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt).cuda()
+                else:
+                    pred = trans(pred)
+                    pred = (pred - torch.min(pred)) / (torch.max(pred) -
+                                                       torch.min(pred) + 1e-20)
+                    gt = trans(gt)
+                y = gt.mean()
+                if y == 0:
+                    x = pred.mean()
+                    Q = 1.0 - x
+                elif y == 1:
+                    x = pred.mean()
+                    Q = x
+                else:
+                    gt[gt >= 0.5] = 1
+                    gt[gt < 0.5] = 0
+                    Q = alpha * self._S_object(
+                        pred, gt) + (1 - alpha) * self._S_region(pred, gt)
+                    if Q.item() < 0:
+                        Q = torch.FloatTensor([0.0])
+                img_num += 1.0
+                avg_q += Q.item()
+            avg_q /= img_num
+            return avg_q
+    def LOG(self, output):
+        os.makedirs(self.output_dir, exist_ok=True)
+        with open(self.logfile, 'a') as f:
+            f.write(output)
+    def _eval_e(self, y_pred, y, num):
+        if self.cuda:
+            score = torch.zeros(num).cuda()
+            thlist = torch.linspace(0, 1 - 1e-10, num).cuda()
+        else:
+            score = torch.zeros(num)
+            thlist = torch.linspace(0, 1 - 1e-10, num)
+        for i in range(num):
+            y_pred_th = (y_pred >= thlist[i]).float()
+            fm = y_pred_th - y_pred_th.mean()
+            gt = y - y.mean()
+            align_matrix = 2 * gt * fm / (gt * gt + fm * fm + 1e-20)
+            enhanced = ((align_matrix + 1) * (align_matrix + 1)) / 4
+            score[i] = torch.sum(enhanced) / (y.numel() - 1 + 1e-20)
+        return score
+    def _eval_pr(self, y_pred, y, num):
+        if self.cuda:
+            prec, recall = torch.zeros(num).cuda(), torch.zeros(num).cuda()
+            thlist = torch.linspace(0, 1 - 1e-10, num).cuda()
+        else:
+            prec, recall = torch.zeros(num), torch.zeros(num)
+            thlist = torch.linspace(0, 1 - 1e-10, num)
+        for i in range(num):
+            y_temp = (y_pred >= thlist[i]).float()
+            tp = (y_temp * y).sum()
+            prec[i], recall[i] = tp / (y_temp.sum() + 1e-20), tp / (y.sum() + 1e-20)
+        return prec, recall
+    def _eval_roc(self, y_pred, y, num):
+        if self.cuda:
+            TPR, FPR = torch.zeros(num).cuda(), torch.zeros(num).cuda()
+            thlist = torch.linspace(0, 1 - 1e-10, num).cuda()
+        else:
+            TPR, FPR = torch.zeros(num), torch.zeros(num)
+            thlist = torch.linspace(0, 1 - 1e-10, num)
+        for i in range(num):
+            y_temp = (y_pred >= thlist[i]).float()
+            tp = (y_temp * y).sum()
+            fp = (y_temp * (1 - y)).sum()
+            tn = ((1 - y_temp) * (1 - y)).sum()
+            fn = ((1 - y_temp) * y).sum()
+            TPR[i] = tp / (tp + fn + 1e-20)
+            FPR[i] = fp / (fp + tn + 1e-20)
+        return TPR, FPR
+    def _S_object(self, pred, gt):
+        fg = torch.where(gt == 0, torch.zeros_like(pred), pred)
+        bg = torch.where(gt == 1, torch.zeros_like(pred), 1 - pred)
+        o_fg = self._object(fg, gt)
+        o_bg = self._object(bg, 1 - gt)
+        u = gt.mean()
+        Q = u * o_fg + (1 - u) * o_bg
+        return Q
+    def _object(self, pred, gt):
+        temp = pred[gt == 1]
+        x = temp.mean()
+        sigma_x = temp.std()
+        score = 2.0 * x / (x * x + 1.0 + sigma_x + 1e-20)
+        return score
+    def _S_region(self, pred, gt):
+        X, Y = self._centroid(gt)
+        gt1, gt2, gt3, gt4, w1, w2, w3, w4 = self._divideGT(gt, X, Y)
+        p1, p2, p3, p4 = self._dividePrediction(pred, X, Y)
+        Q1 = self._ssim(p1, gt1)
+        Q2 = self._ssim(p2, gt2)
+        Q3 = self._ssim(p3, gt3)
+        Q4 = self._ssim(p4, gt4)
+        Q = w1 * Q1 + w2 * Q2 + w3 * Q3 + w4 * Q4
+        return Q
+    def _centroid(self, gt):
+        rows, cols = gt.size()[-2:]
+        gt = gt.view(rows, cols)
+        if gt.sum() == 0:
+            if self.cuda:
+                X = torch.eye(1).cuda() * round(cols / 2)
+                Y = torch.eye(1).cuda() * round(rows / 2)
+            else:
+                X = torch.eye(1) * round(cols / 2)
+                Y = torch.eye(1) * round(rows / 2)
+        else:
+            total = gt.sum()
+            if self.cuda:
+                i = torch.from_numpy(np.arange(0, cols)).cuda().float()
+                j = torch.from_numpy(np.arange(0, rows)).cuda().float()
+            else:
+                i = torch.from_numpy(np.arange(0, cols)).float()
+                j = torch.from_numpy(np.arange(0, rows)).float()
+            X = torch.round((gt.sum(dim=0) * i).sum() / total + 1e-20)
+            Y = torch.round((gt.sum(dim=1) * j).sum() / total + 1e-20)
+        return X.long(), Y.long()
+    def _divideGT(self, gt, X, Y):
+        h, w = gt.size()[-2:]
+        area = h * w
+        gt = gt.view(h, w)
+        LT = gt[:Y, :X]
+        RT = gt[:Y, X:w]
+        LB = gt[Y:h, :X]
+        RB = gt[Y:h, X:w]
+        X = X.float()
+        Y = Y.float()
+        w1 = X * Y / area
+        w2 = (w - X) * Y / area
+        w3 = X * (h - Y) / area
+        w4 = 1 - w1 - w2 - w3
+        return LT, RT, LB, RB, w1, w2, w3, w4
+    def _dividePrediction(self, pred, X, Y):
+        h, w = pred.size()[-2:]
+        pred = pred.view(h, w)
+        LT = pred[:Y, :X]
+        RT = pred[:Y, X:w]
+        LB = pred[Y:h, :X]
+        RB = pred[Y:h, X:w]
+        return LT, RT, LB, RB
+    def _ssim(self, pred, gt):
+        gt = gt.float()
+        h, w = pred.size()[-2:]
+        N = h * w
+        x = pred.mean()
+        y = gt.mean()
+        sigma_x2 = ((pred - x) * (pred - x)).sum() / (N - 1 + 1e-20)
+        sigma_y2 = ((gt - y) * (gt - y)).sum() / (N - 1 + 1e-20)
+        sigma_xy = ((pred - x) * (gt - y)).sum() / (N - 1 + 1e-20)
+        aplha = 4 * x * y * sigma_xy
+        beta = (x * x + y * y) * (sigma_x2 + sigma_y2)
+        if aplha != 0:
+            Q = aplha / (beta + 1e-20)
+        elif aplha == 0 and beta == 0:
+            Q = 1.0
+        else:
+            Q = 0
+        return Q
+    def Eval_AP(self, prec, recall):
+        # Ref:
+        # https://github.com/facebookresearch/Detectron/blob/05d04d3a024f0991339de45872d02f2f50669b3d/lib/datasets/voc_eval.py#L54
+        print('Evaluating AP...')
+        ap_r = np.concatenate(([0.], recall, [1.]))
+        ap_p = np.concatenate(([0.], prec, [0.]))
+        sorted_idxes = np.argsort(ap_r)
+        ap_r = ap_r[sorted_idxes]
+        ap_p = ap_p[sorted_idxes]
+        count = ap_r.shape[0]
+        for i in range(count - 1, 0, -1):
+            ap_p[i - 1] = max(ap_p[i], ap_p[i - 1])
+        i = np.where(ap_r[1:] != ap_r[:-1])[0]
+        ap = np.sum((ap_r[i + 1] - ap_r[i]) * ap_p[i + 1])
+        return ap

hist_of_pixel_values.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import os
+import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+root_dir = os.path.join([rd for rd in os.listdir('.') if 'gconet_' in rd][0], 'CoCA/Accordion')
+image_paths = [os.path.join(root_dir, p) for p in os.listdir(root_dir)]
+pixel_values = []
+for image_path in image_paths:
+    image = cv2.imread(image_path)
+    pixel_value = image.flatten().squeeze().tolist()
+    pixel_values += pixel_value
+pixel_values = np.array(pixel_values)
+non_zero_values = pixel_values[pixel_values >= 0]
+margin_values_percent = (np.sum(non_zero_values > 230) + np.sum(non_zero_values <= 0)) / non_zero_values.shape[0] * 100
+print('histing...')
+plt.hist(x=non_zero_values)
+plt.title('(0+>230)/all, {:.1f} % are margin values'.format(margin_values_percent))
+plt.savefig('hist_(0+>230)|all.png')
+plt.show()
+non_zero_values = pixel_values[pixel_values >= 0]
+margin_values_percent = (np.sum(non_zero_values > 230) + np.sum(non_zero_values < 0)) / non_zero_values.shape[0] * 100
+print('histing...')
+plt.figure()
+plt.hist(x=non_zero_values)
+plt.title('(230)/all, {:.1f} % are margin values'.format(margin_values_percent))
+plt.savefig('hist_(230)|all.png')
+plt.show()
+non_zero_values = pixel_values[pixel_values > 0]
+margin_values_percent = (np.sum(non_zero_values > 230) + np.sum(non_zero_values <= 0)) / non_zero_values.shape[0] * 100
+print('histing...')
+plt.figure()
+plt.hist(x=non_zero_values)
+plt.title('(0+>230)/(all-0), {:.1f} % are margin values'.format(margin_values_percent))
+plt.savefig('hist_(0+>230)|(all-0).png')
+plt.show()

loss.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from torch import nn
+import torch
+import torch.nn.functional as F
+import math
+import numpy as np
+from torch.autograd import Variable
+class IoU_loss(torch.nn.Module):
+    def __init__(self):
+        super(IoU_loss, self).__init__()
+    def forward(self, pred, target):
+        b = pred.shape[0]
+        IoU = 0.0
+        for i in range(0, b):
+            #compute the IoU of the foreground
+            Iand1 = torch.sum(target[i, :, :, :]*pred[i, :, :, :])
+            Ior1 = torch.sum(target[i, :, :, :]) + torch.sum(pred[i, :, :, :])-Iand1
+            IoU1 = Iand1/(Ior1 + 1e-5)
+            #IoU loss is (1-IoU1)
+            IoU = IoU + (1-IoU1)
+        return IoU/b
+        #return IoU
+class Scale_IoU(nn.Module):
+    def __init__(self):
+        super(Scale_IoU, self).__init__()
+        self.iou = IoU_loss()
+    def forward(self, scaled_preds, gt):
+        loss = 0
+        for pred_lvl in scaled_preds[0:]:
+            loss += self.iou(torch.sigmoid(pred_lvl), gt) + self.iou(1-torch.sigmoid(pred_lvl), 1-gt)
+        return loss
+def compute_cos_dis(x_sup, x_que):
+    x_sup = x_sup.view(x_sup.size()[0], x_sup.size()[1], -1)
+    x_que = x_que.view(x_que.size()[0], x_que.size()[1], -1)
+    x_que_norm = torch.norm(x_que, p=2, dim=1, keepdim=True)
+    x_sup_norm = torch.norm(x_sup, p=2, dim=1, keepdim=True)
+    x_que_norm = x_que_norm.permute(0, 2, 1)
+    x_qs_norm = torch.matmul(x_que_norm, x_sup_norm)
+    x_que = x_que.permute(0, 2, 1)
+    x_qs = torch.matmul(x_que, x_sup)
+    x_qs = x_qs / (x_qs_norm + 1e-5)
+    return x_qs

main.cpython-37.pyc ADDED Viewed

Binary file (10.5 kB). View file

main.cpython-38.pyc ADDED Viewed

Binary file (10.3 kB). View file

main.py ADDED Viewed

	@@ -0,0 +1,315 @@

+import torch
+from torch import nn
+import torch.nn.functional as F
+from models.vgg import VGG_Backbone
+from util import *
+def weights_init(module):
+    if isinstance(module, nn.Conv2d):
+        nn.init.kaiming_normal_(module.weight, mode='fan_in', nonlinearity='relu')
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+    elif isinstance(module, (nn.BatchNorm2d, nn.GroupNorm)):
+        nn.init.ones_(module.weight)
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+    elif isinstance(module, nn.Linear):
+        nn.init.kaiming_normal_(module.weight, mode='fan_in', nonlinearity='relu')
+        if module.bias is not None:
+            nn.init.zeros_(module.bias)
+class EnLayer(nn.Module):
+    def __init__(self, in_channel=64):
+        super(EnLayer, self).__init__()
+        self.enlayer = nn.Sequential(
+            nn.Conv2d(in_channel, 64, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
+        )
+    def forward(self, x):
+        x = self.enlayer(x)
+        return x
+class LatLayer(nn.Module):
+    def __init__(self, in_channel):
+        super(LatLayer, self).__init__()
+        self.convlayer = nn.Sequential(
+            nn.Conv2d(in_channel, 64, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
+        )
+    def forward(self, x):
+        x = self.convlayer(x)
+        return x
+class DSLayer(nn.Module):
+    def __init__(self, in_channel=64):
+        super(DSLayer, self).__init__()
+        self.enlayer = nn.Sequential(
+            nn.Conv2d(in_channel, 64, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True),
+        )
+        self.predlayer = nn.Sequential(
+            nn.Conv2d(64, 1, kernel_size=1, stride=1, padding=0))#, nn.Sigmoid())
+    def forward(self, x):
+        x = self.enlayer(x)
+        x = self.predlayer(x)
+        return x
+class half_DSLayer(nn.Module):
+    def __init__(self, in_channel=512):
+        super(half_DSLayer, self).__init__()
+        self.enlayer = nn.Sequential(
+            nn.Conv2d(in_channel, int(in_channel/4), kernel_size=3, stride=1, padding=1),
+            nn.ReLU(inplace=True),
+        )
+        self.predlayer = nn.Sequential(
+            nn.Conv2d(int(in_channel/4), 1, kernel_size=1, stride=1, padding=0)) #, nn.Sigmoid())
+    def forward(self, x):
+        x = self.enlayer(x)
+        x = self.predlayer(x)
+        return x
+class AugAttentionModule(nn.Module):
+    def __init__(self, input_channels=512):
+        super(AugAttentionModule, self).__init__()
+        self.query_transform = nn.Sequential(
+            nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0),
+            nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0),
+        )
+        self.key_transform = nn.Sequential(
+            nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0),
+            nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0),
+        )
+        self.value_transform = nn.Sequential(
+            nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0),
+            nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0),
+        )
+        self.scale = 1.0 / (input_channels ** 0.5)
+        self.conv = nn.Sequential(
+            nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, x):
+        B, C, H, W = x.size()
+        x = self.conv(x)
+        x_query = self.query_transform(x).view(B, C, -1).permute(0, 2, 1)  # B,HW,C
+        # x_key: C,BHW
+        x_key = self.key_transform(x).view(B, C, -1)  # B, C,HW
+        # x_value: BHW, C
+        x_value = self.value_transform(x).view(B, C, -1).permute(0, 2, 1)  # B,HW,C
+        attention_bmm = torch.bmm(x_query, x_key)*self.scale # B, HW, HW
+        attention = F.softmax(attention_bmm, dim=-1)
+        attention_sort = torch.sort(attention_bmm, dim=-1, descending=True)[1]
+        attention_sort = torch.sort(attention_sort, dim=-1)[1]
+        #####
+        attention_positive_num = torch.ones_like(attention).cuda()
+        attention_positive_num[attention_bmm < 0] = 0
+        att_pos_mask = attention_positive_num.clone()
+        attention_positive_num = torch.sum(attention_positive_num, dim=-1, keepdim=True).expand_as(attention_sort)
+        attention_sort_pos = attention_sort.float().clone()
+        apn = attention_positive_num-1
+        attention_sort_pos[attention_sort > apn] = 0
+        attention_mask = ((attention_sort_pos+1)**3)*att_pos_mask + (1-att_pos_mask)
+        out = torch.bmm(attention*attention_mask, x_value)
+        out = out.view(B, H, W, C).permute(0, 3, 1, 2)
+        return out+x
+class AttLayer(nn.Module):
+    def __init__(self, input_channels=512):
+        super(AttLayer, self).__init__()
+        self.query_transform = nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0)
+        self.key_transform = nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0)
+        self.scale = 1.0 / (input_channels ** 0.5)
+        self.conv = nn.Conv2d(input_channels, input_channels, kernel_size=1, stride=1, padding=0)
+    def correlation(self, x5, seeds):
+        B, C, H5, W5 = x5.size()
+        if self.training:
+            correlation_maps = F.conv2d(x5, weight=seeds)  # B,B,H,W
+        else:
+            correlation_maps = torch.relu(F.conv2d(x5, weight=seeds))  # B,B,H,W
+        correlation_maps = correlation_maps.mean(1).view(B, -1)
+        min_value = torch.min(correlation_maps, dim=1, keepdim=True)[0]
+        max_value = torch.max(correlation_maps, dim=1, keepdim=True)[0]
+        correlation_maps = (correlation_maps - min_value) / (max_value - min_value + 1e-12)  # shape=[B, HW]
+        correlation_maps = correlation_maps.view(B, 1, H5, W5)  # shape=[B, 1, H, W]
+        return correlation_maps
+    def forward(self, x5):
+        # x: B,C,H,W
+        x5 = self.conv(x5)+x5
+        B, C, H5, W5 = x5.size()
+        x_query = self.query_transform(x5).view(B, C, -1)
+        # x_query: B,HW,C
+        x_query = torch.transpose(x_query, 1, 2).contiguous().view(-1, C)  # BHW, C
+        # x_key: B,C,HW
+        x_key = self.key_transform(x5).view(B, C, -1)
+        x_key = torch.transpose(x_key, 0, 1).contiguous().view(C, -1)  # C, BHW
+        # W = Q^T K: B,HW,HW
+        x_w1 = torch.matmul(x_query, x_key) * self.scale # BHW, BHW
+        x_w = x_w1.view(B * H5 * W5, B, H5 * W5)
+        x_w = torch.max(x_w, -1).values  # BHW, B
+        x_w = x_w.mean(-1)
+        x_w = x_w.view(B, -1)   # B, HW
+        x_w = F.softmax(x_w, dim=-1)  # B, HW
+        #####  mine ######
+        # x_w_max = torch.max(x_w, -1)
+        # max_indices0 = x_w_max.indices.unsqueeze(-1).unsqueeze(-1)
+        norm0 = F.normalize(x5, dim=1)
+        # norm = norm0.view(B, C, -1)
+        # max_indices = max_indices0.expand(B, C, -1)
+        # seeds = torch.gather(norm, 2, max_indices).unsqueeze(-1)
+        x_w = x_w.unsqueeze(1)
+        x_w_max = torch.max(x_w, -1).values.unsqueeze(2).expand_as(x_w)
+        mask = torch.zeros_like(x_w).cuda()
+        mask[x_w == x_w_max] = 1
+        mask = mask.view(B, 1, H5, W5)
+        seeds = norm0 * mask
+        seeds = seeds.sum(3).sum(2).unsqueeze(2).unsqueeze(3)
+        cormap = self.correlation(norm0, seeds)
+        x51 = x5 * cormap
+        proto1 = torch.mean(x51, (0, 2, 3), True)
+        return x5, proto1, x5*proto1+x51, mask
+class Decoder(nn.Module):
+    def __init__(self):
+        super(Decoder, self).__init__()
+        self.toplayer = nn.Sequential(
+            nn.Conv2d(512, 64, kernel_size=1, stride=1, padding=0),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(64, 64, kernel_size=1, stride=1, padding=0))
+        self.latlayer4 = LatLayer(in_channel=512)
+        self.latlayer3 = LatLayer(in_channel=256)
+        self.latlayer2 = LatLayer(in_channel=128)
+        self.latlayer1 = LatLayer(in_channel=64)
+        self.enlayer4 = EnLayer()
+        self.enlayer3 = EnLayer()
+        self.enlayer2 = EnLayer()
+        self.enlayer1 = EnLayer()
+        self.dslayer4 = DSLayer()
+        self.dslayer3 = DSLayer()
+        self.dslayer2 = DSLayer()
+        self.dslayer1 = DSLayer()
+    def _upsample_add(self, x, y):
+        [_, _, H, W] = y.size()
+        x = F.interpolate(x, size=(H, W), mode='bilinear', align_corners=False)
+        return x + y
+    def forward(self, weighted_x5, x4, x3, x2, x1, H, W):
+        preds = []
+        p5 = self.toplayer(weighted_x5)
+        p4 = self._upsample_add(p5, self.latlayer4(x4))
+        p4 = self.enlayer4(p4)
+        _pred = self.dslayer4(p4)
+        preds.append(
+            F.interpolate(_pred,
+                          size=(H, W),
+                          mode='bilinear', align_corners=False))
+        p3 = self._upsample_add(p4, self.latlayer3(x3))
+        p3 = self.enlayer3(p3)
+        _pred = self.dslayer3(p3)
+        preds.append(
+            F.interpolate(_pred,
+                          size=(H, W),
+                          mode='bilinear', align_corners=False))
+        p2 = self._upsample_add(p3, self.latlayer2(x2))
+        p2 = self.enlayer2(p2)
+        _pred = self.dslayer2(p2)
+        preds.append(
+            F.interpolate(_pred,
+                          size=(H, W),
+                          mode='bilinear', align_corners=False))
+        p1 = self._upsample_add(p2, self.latlayer1(x1))
+        p1 = self.enlayer1(p1)
+        _pred = self.dslayer1(p1)
+        preds.append(
+            F.interpolate(_pred,
+                          size=(H, W),
+                          mode='bilinear', align_corners=False))
+        return preds
+class DCFMNet(nn.Module):
+    """ Class for extracting activations and
+    registering gradients from targetted intermediate layers """
+    def __init__(self, mode='train'):
+        super(DCFMNet, self).__init__()
+        self.gradients = None
+        self.backbone = VGG_Backbone()
+        self.mode = mode
+        self.aug = AugAttentionModule()
+        self.fusion = AttLayer(512)
+        self.decoder = Decoder()
+    def set_mode(self, mode):
+        self.mode = mode
+    def forward(self, x, gt):
+        if self.mode == 'train':
+            preds = self._forward(x, gt)
+        else:
+            with torch.no_grad():
+                preds = self._forward(x, gt)
+        return preds
+    def featextract(self, x):
+        x1 = self.backbone.conv1(x)
+        x2 = self.backbone.conv2(x1)
+        x3 = self.backbone.conv3(x2)
+        x4 = self.backbone.conv4(x3)
+        x5 = self.backbone.conv5(x4)
+        return x5, x4, x3, x2, x1
+    def _forward(self, x, gt):
+        [B, _, H, W] = x.size()
+        x5, x4, x3, x2, x1 = self.featextract(x)
+        feat, proto, weighted_x5, cormap = self.fusion(x5)
+        feataug = self.aug(weighted_x5)
+        preds = self.decoder(feataug, x4, x3, x2, x1, H, W)
+        if self.training:
+            gt = F.interpolate(gt, size=weighted_x5.size()[2:], mode='bilinear', align_corners=False)
+            feat_pos, proto_pos, weighted_x5_pos, cormap_pos = self.fusion(x5 * gt)
+            feat_neg, proto_neg, weighted_x5_neg, cormap_neg = self.fusion(x5*(1-gt))
+            return preds, proto, proto_pos, proto_neg
+        return preds
+class DCFM(nn.Module):
+    def __init__(self, mode='train'):
+        super(DCFM, self).__init__()
+        set_seed(123)
+        self.dcfmnet = DCFMNet()
+        self.mode = mode
+    def set_mode(self, mode):
+        self.mode = mode
+        self.dcfmnet.set_mode(self.mode)
+    def forward(self, x, gt):
+        ########## Co-SOD ############
+        preds = self.dcfmnet(x, gt)
+        return preds

preprocessing.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import glob
+import os
+import cv2
+import numpy as np
+from PIL import Image
+import torch
+from PIL import Image
+from pathlib import Path
+import torchvision.transforms as T
+import torchvision.transforms.functional as TF
+import numpy as np
+from torchvision import transforms
+import os
+import cv2
+import matplotlib.pyplot as plt
+import shutil
+source = "/scratch/wej36how/Datasets/NWRD/train"
+dest = "/scratch/wej36how/Datasets/NWRDProcessed/train"
+patch_size = 224
+rust_threshold = 150
+max_number_of_images_per_group = 12
+patches_path = os.path.join(dest, "patches")
+images_dir = os.path.join(patches_path, "images")
+masks_dir = os.path.join(patches_path, "masks")
+destination = os.path.join(dest, "RustNonRustSplit")
+root = patches_path
+rust_images_dir = os.path.join(destination,"rust","images")
+non_rust_images_dir = os.path.join(destination,"non_rust","images")
+rustClassificationDir = os.path.join(dest, "calssification", "rust")
+nonRustClassificationDir = os.path.join(dest, "calssification", "non_rust")
+os.makedirs(rustClassificationDir, exist_ok=True)
+os.makedirs(nonRustClassificationDir, exist_ok=True)
+shutil.copytree(rust_images_dir,rustClassificationDir, dirs_exist_ok=True)
+shutil.copytree(non_rust_images_dir,nonRustClassificationDir, dirs_exist_ok=True)
+import os
+import glob
+def delete_extra_images(directory, target_count):
+    # Get a list of all image files in the directory
+    image_files = glob.glob(os.path.join(directory, '*.JPG')) + glob.glob(os.path.join(directory, '*.jpeg')) + glob.glob(os.path.join(directory, '*.png'))
+    # Check if the number of images exceeds the target count
+    if len(image_files) > target_count:
+        # Calculate the number of images to delete
+        num_to_delete = len(image_files) - target_count
+        # Sort the images by modification time (oldest first)
+        image_files.sort(key=os.path.getmtime)
+        # Delete the extra images
+        for i in range(num_to_delete):
+            os.remove(image_files[i])
+        print(f"{num_to_delete} images deleted.")
+    elif len(image_files) < target_count:
+        print("Warning: Number of images in directory is less than the target count.")
+if len(os.listdir(rustClassificationDir))< len(os.listdir(nonRustClassificationDir)):
+    delete_extra_images(nonRustClassificationDir, len(os.listdir(rustClassificationDir)))
+else:
+    delete_extra_images(rustClassificationDir, len(os.listdir(nonRustClassificationDir)))
+rust_dir = os.path.join(destination,"rust")
+rustCosaliencynDir = os.path.join(dest, "cosaliency")
+shutil.copytree(rust_dir,rustCosaliencynDir, dirs_exist_ok=True)
+# Function to split images into folders based on image number
+def split_images_into_folders(source_dir, destination_dir):
+    # Create destination directory if it doesn't exist
+    if not os.path.exists(destination_dir):
+        os.makedirs(destination_dir)
+    # Iterate through files in the source directory
+    for filename in os.listdir(source_dir):
+        if filename.endswith('.png'):
+            image_no = filename.split('_')[0]  # Extract image number from filename
+            if not image_no.isdigit():
+                image_no = filename.split('_')[1]
+            destination_subdir = os.path.join(destination_dir, image_no)
+            # Create subdirectory if it doesn't exist
+            if not os.path.exists(destination_subdir):
+                os.makedirs(destination_subdir)
+            # Move the image file to the respective subdirectory
+            shutil.move(os.path.join(source_dir, filename), destination_subdir)
+def organize_images(main_directory):
+    # Ensure the main directory exists
+    if not os.path.exists(main_directory):
+        print(f"The specified main directory '{main_directory}' does not exist.")
+        return
+    # Get a list of subdirectories in the main directory
+    subdirectories = [d for d in os.listdir(main_directory) if os.path.isdir(os.path.join(main_directory, d))]
+    # Process each subdirectory
+    for subdir in subdirectories:
+        subdir_path = os.path.join(main_directory, subdir)
+        # Get a list of images in the subdirectory
+        images = [f for f in os.listdir(subdir_path) if f.lower().endswith(('.jpg', '.jpeg', '.png', '.gif'))]
+        # Determine the number of images per subdirectory
+        images_per_subdir = 12
+        num_subdirectories = len(images) // images_per_subdir
+        n=0
+        # Create additional subdirectories if needed
+        for i in range(num_subdirectories - 1):
+            new_subdir_name = f"{subdir}_part{i + 1}"
+            new_subdir_path = os.path.join(main_directory, new_subdir_name)
+            # Create the new subdirectory
+            os.makedirs(new_subdir_path)
+            # Move images to the new subdirectory
+            for j in range(images_per_subdir):
+                old_image_path = os.path.join(subdir_path, images[n])
+                new_image_path = os.path.join(new_subdir_path, images[n])
+                shutil.move(old_image_path, new_image_path)
+                n+=1
+source_directory = os.path.join(dest, "cosaliency", "images")
+destination_directory = os.path.join(dest, "cosaliency", "images")
+split_images_into_folders(source_directory, destination_directory)
+organize_images(destination_directory)
+source_directory = os.path.join(dest, "cosaliency", "masks")
+destination_directory = os.path.join(dest, "cosaliency", "masks")
+split_images_into_folders(source_directory, destination_directory)
+organize_images(destination_directory)

requirements.txt ADDED Viewed

	@@ -0,0 +1,12 @@

+matplotlib==3.4.1
+numpy==1.19.2
+opencv_python==4.5.1.48
+pandas==1.2.4
+Pillow==9.1.0
+pytorch_toolbelt==0.4.3
+scikit_image==0.18.1
+skimage==0.0
+torch==1.7.1
+torchvision==0.2.2
+tqdm==4.60.0
+transformers

segmentation_metrics.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import cv2
+import numpy as np
+from sklearn.metrics import precision_score, recall_score, f1_score
+from sklearn.metrics import jaccard_score
+# Directories containing the prediction maps and ground truth masks
+dir1 = '/home/wej36how/codes/CoSOD-main/result/Predictions/NWRDFRust_concatenated'
+dir2 = '/home/wej36how/datasets/NWRDF/test/masks'
+# Initialize lists to store scores
+precisions = []
+recalls = []
+f1_scores = []
+iou_scores = []
+# Loop through all files in the prediction directory
+for filename in os.listdir(dir1):
+    pred_path = os.path.join(dir1, filename)
+    gt_path = os.path.join(dir2, filename)
+    print(pred_path)
+    # Ensure that the file exists in both directories
+    if os.path.exists(pred_path) and os.path.exists(gt_path):
+        # Load the prediction and ground truth images
+        pred_img = cv2.imread(pred_path, cv2.IMREAD_GRAYSCALE)
+        gt_img = cv2.imread(gt_path, cv2.IMREAD_GRAYSCALE)
+        # Flatten the images to 1D arrays
+        pred_flat = pred_img.flatten()
+        gt_flat = gt_img.flatten()
+        # Binarize the images (assuming binary segmentation masks)
+        pred_flat = (pred_flat > 127).astype(np.uint8)
+        gt_flat = (gt_flat > 127).astype(np.uint8)
+        # Calculate precision, recall, and F1 score
+        precision = precision_score(gt_flat, pred_flat)
+        recall = recall_score(gt_flat, pred_flat)
+        f1 = f1_score(gt_flat, pred_flat)
+        iou = jaccard_score(gt_flat, pred_flat)
+        # Append the scores to the lists
+        precisions.append(precision)
+        recalls.append(recall)
+        f1_scores.append(f1)
+        iou_scores.append(iou)
+# Calculate average scores
+avg_precision = np.mean(precisions)
+avg_recall = np.mean(recalls)
+avg_f1_score = np.mean(f1_scores)
+avg_iou = np.mean(iou_scores)
+# Print the results
+print(f'Average Precision: {avg_precision:.4f}')
+print(f'Average Recall: {avg_recall:.4f}')
+print(f'Average F1 Score: {avg_f1_score:.4f}')
+print(f'Average iou Score: {avg_iou:.4f}')

select_results.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+import argparse
+import numpy as np
+import cv2
+from evaluator import Eval_thread
+from dataloader import EvalDataset
+import sys
+sys.path.append('..')
+def main(cfg):
+    dataset_names = cfg.datasets.split('+')
+    root_dir_predictions = [dr for dr in os.listdir('.') if 'gconet_' in dr]
+    root_dir_prediction_comp = cfg.gt_dir.replace('/gts', '/gconet')
+    print('root_dir_predictions:', root_dir_predictions)
+    root_dir_prediction = root_dir_predictions[0]
+    root_dir_good_ones = 'good_ones'
+    for dataset in dataset_names:
+        dir_prediction = os.path.join(root_dir_prediction, dataset)
+        dir_prediction_comp = os.path.join(root_dir_prediction_comp, dataset)
+        dir_gt = os.path.join(cfg.gt_dir, dataset)
+        loader = EvalDataset(
+            dir_prediction,        # preds
+            dir_gt,                   # GT
+            return_predpath=True,
+            return_gtpath=True
+        )
+        loader_comp = EvalDataset(
+            dir_prediction_comp,        # preds
+            dir_gt,                   # GT
+            return_predpath=True
+        )
+        print('Selecting predictions from {}'.format(dir_prediction))
+        thread = Eval_thread(loader, cuda=cfg.cuda)
+        s_measure, good_ones, good_ones_comp, good_ones_gt = thread.select_by_Smeasure(bar=0.95, loader_comp=loader_comp, bar_comp=0.2)
+        dir_good_ones = os.path.join(root_dir_good_ones, dataset)
+        os.makedirs(dir_good_ones, exist_ok=True)
+        print('have good_ones {}'.format(len(good_ones)))
+        for good_one, good_one_comp, good_one_gt in zip(good_ones, good_ones_comp, good_ones_gt):
+            dir_category = os.path.join(dir_good_ones, good_one.split('/')[-2])
+            os.makedirs(dir_category, exist_ok=True)
+            save_path = os.path.join(dir_category, good_one.split('/')[-1])
+            sal_map = cv2.imread(good_one)
+            sal_map_gt = cv2.imread(good_one_gt)
+            sal_map_comp = cv2.imread(good_one_comp)
+            image_path = good_one_gt.replace('/gts', '/images').replace('.png', '.jpg')
+            image = cv2.imread(image_path)
+            cv2.imwrite(save_path, sal_map)
+            split_line = np.zeros((sal_map.shape[0], 10, 3)).astype(sal_map.dtype) + 127
+            comp = cv2.hconcat([image, split_line, sal_map_gt, split_line, sal_map, split_line, sal_map_comp])
+            save_path_comp = ''.join((save_path[:-4], '_comp', save_path[-4:]))
+            cv2.imwrite(save_path_comp, comp)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--datasets', type=str, default='CoCA+CoSOD3k+CoSal2015')
+    parser.add_argument('--gt_dir', type=str, default='/root/datasets/sod/gts', help='GT')
+    parser.add_argument('--cuda', type=bool, default=True)
+    config = parser.parse_args()
+    main(config)

sort_results.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import os
+import shutil
+import matplotlib.pyplot as plt
+import numpy as np
+move_best_results_here = False
+record = ['dataset', 'ckpt', 'Emax', 'Smeasure', 'Fmax', 'MAE', 'Emean', 'Fmean']
+measurement = 'Emax'
+score_idx = record.index(measurement)
+with open('output/details/result.txt', 'r') as f:
+    res = f.read()
+res = res.replace('||', '').replace('(', '').replace(')', '')
+score = []
+for r in res.splitlines():
+    ds = r.split()
+    s = ds[:2]
+    for idx_d, d in enumerate(ds[2:]):
+        if idx_d % 2 == 0:
+            s.append(float(d))
+    score.append(s)
+ss = sorted(score, key=lambda x: (x[record.index('dataset')], x[record.index('Emax')], x[record.index('Smeasure')], x[record.index('Fmax')], x[record.index('ckpt')]), reverse=True)
+ss_ar = np.array(ss)
+np.savetxt('score_sorted.txt', ss_ar, fmt='%s')
+ckpt_coca = ss_ar[ss_ar[:, 0] == 'CoCA'][0][1]
+ckpt_cosod = ss_ar[ss_ar[:, 0] == 'CoSOD3k'][0][1]
+ckpt_cosal = ss_ar[ss_ar[:, 0] == 'CoSal2015'][0][1]
+best_coca_scores = ss_ar[ss_ar[:, 1] == ckpt_coca]
+best_cosod_scores = ss_ar[ss_ar[:, 1] == ckpt_cosod]
+best_cosal_scores = ss_ar[ss_ar[:, 1] == ckpt_cosal]
+print('Best (models may be different):')
+print('CoCA:\n', best_coca_scores)
+print('CoSOD3k:\n', best_cosod_scores)
+print('CoSal2015:\n', best_cosal_scores)
+# Overal relative Emax improvement on three datasets
+if measurement == 'Emax':
+    gco_scores = {'CoCA': 0.760, 'CoSOD3k': 0.860, 'CoSal2015': 0.887}
+    gco_scores_Smeasure = {'CoCA': 0.673, 'CoSOD3k': 0.802, 'CoSal2015': 0.845}
+elif measurement == 'Smeasure':
+    gco_scores = {'CoCA': 0.673, 'CoSOD3k': 0.802, 'CoSal2015': 0.845}
+elif measurement == 'Fmax':
+    gco_scores = {'CoCA': 0.544, 'CoSOD3k': 0.777, 'CoSal2015': 0.847}
+elif measurement == 'Emean':
+    gco_scores = {'CoCA': 0.1, 'CoSOD3k': 0.1, 'CoSal2015': 0.1}
+elif measurement == 'Fmean':
+    gco_scores = {'CoCA': 0.1, 'CoSOD3k': 0.1, 'CoSal2015': 0.1}
+ckpts = list(set(ss_ar[:, 1].squeeze().tolist()))
+improvements_mean = []
+improvements_lst = []
+improvements_mean_Smeasure = []
+improvements_lst_Smeasure = []
+for ckpt in ckpts:
+    scores = ss_ar[ss_ar[:, 1] == ckpt]
+    if scores.shape[0] != len(gco_scores):
+        improvements_mean.append(-1)
+        improvements_lst.append([-1, -1, 1])
+        improvements_mean_Smeasure.append(-1)
+        improvements_lst_Smeasure.append([-1, -1, 1])
+        continue
+    score_coca = float(scores[scores[:, 0] == 'CoCA'][0][score_idx])
+    score_cosod = float(scores[scores[:, 0] == 'CoSOD3k'][0][score_idx])
+    score_cosal = float(scores[scores[:, 0] == 'CoSal2015'][0][score_idx])
+    improvements = [
+        (score_coca - gco_scores['CoCA']) / gco_scores['CoCA'],
+        (score_cosod - gco_scores['CoSOD3k']) / gco_scores['CoSOD3k'],
+        (score_cosal - gco_scores['CoSal2015']) / gco_scores['CoSal2015']
+    ]
+    improvement_mean = np.mean(improvements)
+    improvements_mean.append(improvement_mean)
+    improvements_lst.append(improvements)
+    # Smeasure
+    score_coca = float(scores[scores[:, 0] == 'CoCA'][0][record.index('Smeasure')])
+    score_cosod = float(scores[scores[:, 0] == 'CoSOD3k'][0][record.index('Smeasure')])
+    score_cosal = float(scores[scores[:, 0] == 'CoSal2015'][0][record.index('Smeasure')])
+    improvements_Smeasure = [
+        (score_coca - gco_scores_Smeasure['CoCA']) / gco_scores_Smeasure['CoCA'],
+        (score_cosod - gco_scores_Smeasure['CoSOD3k']) / gco_scores_Smeasure['CoSOD3k'],
+        (score_cosal - gco_scores_Smeasure['CoSal2015']) / gco_scores_Smeasure['CoSal2015']
+    ]
+    improvement_mean_Smeasure = np.mean(improvements_Smeasure)
+    improvements_mean_Smeasure.append(improvement_mean_Smeasure)
+    improvements_lst_Smeasure.append(improvements_Smeasure)
+best_measurement = 'Emax'
+if best_measurement == 'Emax':
+    best_improvement_index = np.argsort(improvements_mean).tolist()[-1]
+    best_ckpt = ckpts[best_improvement_index]
+    best_improvement_mean = improvements_mean[best_improvement_index]
+    best_improvements = improvements_lst[best_improvement_index]
+    best_improvement_mean_Smeasure = improvements_mean_Smeasure[best_improvement_index]
+    best_improvements_Smeasure = improvements_lst_Smeasure[best_improvement_index]
+elif best_measurement == 'Smeasure':
+    best_improvement_index = np.argsort(improvements_mean_Smeasure).tolist()[-1]
+    best_ckpt = ckpts[best_improvement_index]
+    best_improvement_mean_Smeasure = improvements_mean_Smeasure[best_improvement_index]
+    best_improvements_Smeasure = improvements_lst_Smeasure[best_improvement_index]
+    best_improvement_mean = improvements_mean[best_improvement_index]
+    best_improvements = improvements_lst[best_improvement_index]
+print('The overall best one:')
+print(ss_ar[ss_ar[:, 1] == best_ckpt])
+print('Got Emax improvements on CoCA-{:.3f}%, CoSOD3k-{:.3f}%, CoSal2015-{:.3f}%, mean_improvement: {:.3f}%.'.format(
+    best_improvements[0]*100, best_improvements[1]*100, best_improvements[2]*100, best_improvement_mean*100
+))
+print('Got Smes improvements on CoCA-{:.3f}%, CoSOD3k-{:.3f}%, CoSal2015-{:.3f}%, mean_improvement: {:.3f}%.'.format(
+    best_improvements_Smeasure[0]*100, best_improvements_Smeasure[1]*100, best_improvements_Smeasure[2]*100, best_improvement_mean_Smeasure*100
+))
+trial = int(best_ckpt.split('_')[-1].split('-')[0])
+ep = int(best_ckpt.split('ep')[-1].split(':')[0])
+if move_best_results_here:
+    trial, ep = 'gconet_{}'.format(trial), 'ep{}'.format(ep)
+    dr = os.path.join(trial, ep)
+    dst = '-'.join((trial, ep))
+    shutil.move(os.path.join('/root/datasets/sod/preds', dr), dst)
+# model_indices = sorted([fname.split('_')[-1] for fname in os.listdir('output/details') if 'gconet_' in fname])
+# emax = {}
+# for model_idx in model_indices:
+#     m = 'gconet_{}-'.format(model_idx)
+#     if m not in list(emax.keys()):
+#         emax[m] = []
+#     for s in score:
+#         if m in s[1]:
+#             ep = int(s[1].split('ep')[-1].rstrip('):'))
+#             emax[m].append([ep, s[2], s[0]])
+# for m, e in emax.items():
+#     plot_name = m[:-1]
+#     print('Saving {} ...'.format(plot_name))
+#     e = np.array(e)
+#     e_coca = e[e[:, -1] == 'CoCA']
+#     e_cosod = e[e[:, -1] == 'CoSOD3k']
+#     e_cosal = e[e[:, -1] == 'CoSal2015']
+#     eps = sorted(list(set(e_coca[:, 0].astype(float))))
+#     e_coca = np.array(sorted(e_coca, key=lambda x: int(x[0])))[:, 1].astype(float)
+#     e_cosod = np.array(sorted(e_cosod, key=lambda x: int(x[0])))[:, 1].astype(float)
+#     e_cosal = np.array(sorted(e_cosal, key=lambda x: int(x[0])))[:, 1].astype(float)
+#     plt.figure()
+#     plt.plot(eps, e_coca)
+#     plt.plot(eps, e_cosod)
+#     plt.plot(eps, e_cosal)
+#     plt.legend(['CoCA', 'CoSOD3k', 'CoSal2015'])
+#     plt.title(m)
+#     plt.savefig('{}.png'.format(plot_name))

test.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# from PIL import Image
+from dataset import get_loader
+import torch
+from torchvision import transforms
+# from util import save_tensor_img, Logger
+from tqdm import tqdm
+from torch import nn
+import os
+from models.main import *
+import argparse
+# import numpy as np
+# import cv2
+# from skimage import img_as_ubyte
+def main(args):
+    # Init model
+    device = torch.device("cuda")
+    model = DCFM()
+    model = model.to(device)
+    try:
+        # modelname = os.path.join(args.param_root, 'best_ep198_Smeasure0.7019.pth')
+        modelname = "/scratch/wej36how/codes/DCFM-master/best_ep12_Smeasure0.7256.pth"
+        dcfmnet_dict = torch.load(modelname)
+        print('loaded', modelname)
+    except:
+        dcfmnet_dict = torch.load(os.path.join(args.param_root, 'dcfm.pth'))
+    model.to(device)
+    model.dcfmnet.load_state_dict(dcfmnet_dict)
+    model.eval()
+    model.set_mode('test')
+    tensor2pil = transforms.ToPILImage()
+    for testset in ['NWRD']:
+        if testset == 'CoCA':
+            test_img_path = './data/images/CoCA/'
+            test_gt_path = './data/gts/CoCA/'
+            saved_root = os.path.join(args.save_root, 'CoCA')
+        elif testset == 'CoSOD3k':
+            test_img_path = './data/images/CoSOD3k/'
+            test_gt_path = './data/gts/CoSOD3k/'
+            saved_root = os.path.join(args.save_root, 'CoSOD3k')
+        elif testset == 'CoSal2015':
+            test_img_path = './data/images/CoSal2015/'
+            test_gt_path = './data/gts/CoSal2015/'
+            saved_root = os.path.join(args.save_root, 'CoSal2015')
+        elif testset == 'NWRD':
+            test_img_path = '/home/wej36how/codes/crossvit/results/nwrd22/images/'
+            test_gt_path = '/home/wej36how/codes/crossvit/results/nwrd22/images/'
+            saved_root = os.path.join(args.save_root, 'NWRD')
+        else:
+            print('Unkonwn test dataset')
+            print(args.dataset)
+        test_loader = get_loader(
+            test_img_path, test_gt_path, args.size, 1, istrain=False, shuffle=False, num_workers=8, pin=True)
+        for batch in tqdm(test_loader):
+            inputs = batch[0].to(device).squeeze(0)
+            gts = batch[1].to(device).squeeze(0)
+            subpaths = batch[2]
+            ori_sizes = batch[3]
+            scaled_preds= model(inputs, gts)
+            scaled_preds = torch.sigmoid(scaled_preds[-1])
+            os.makedirs(os.path.join(saved_root, subpaths[0][0].split('/')[0]), exist_ok=True)
+            num = gts.shape[0]
+            for inum in range(num):
+                subpath = subpaths[inum][0]
+                ori_size = (ori_sizes[inum][0].item(), ori_sizes[inum][1].item())
+                res = nn.functional.interpolate(scaled_preds[inum].unsqueeze(0), size=ori_size, mode='bilinear', align_corners=True)
+                save_tensor_img(res, os.path.join(saved_root, subpath))
+if __name__ == '__main__':
+    # Parameter from command line
+    parser = argparse.ArgumentParser(description='')
+    parser.add_argument('--size',
+                        default=224,
+                        type=int,
+                        help='input size')
+    parser.add_argument('--param_root', default='/data1/dcfm/temp', type=str, help='model folder')
+    parser.add_argument('--save_root', default='./CoSODmaps/pred', type=str, help='Output folder')
+    args = parser.parse_args()
+    main(args)

train.py ADDED Viewed

	@@ -0,0 +1,289 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+from util import Logger, AverageMeter, save_checkpoint, save_tensor_img, set_seed
+import os
+import numpy as np
+from matplotlib import pyplot as plt
+import time
+import argparse
+from tqdm import tqdm
+from dataset import get_loader
+from loss import *
+from config import Config
+from evaluation.dataloader import EvalDataset
+from evaluation.evaluator import Eval_thread
+from models.main import *
+import torch.nn.functional as F
+import pytorch_toolbelt.losses as PTL
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+# Parameter from command line
+parser = argparse.ArgumentParser(description='')
+parser.add_argument('--loss',
+                    default='Scale_IoU',
+                    type=str,
+                    help="Options: '', ''")
+parser.add_argument('--bs', '--batch_size', default=1, type=int)
+parser.add_argument('--lr',
+                    '--learning_rate',
+                    default=1e-4,
+                    type=float,
+                    help='Initial learning rate')
+parser.add_argument('--resume',
+                    default=None,
+                    type=str,
+                    help='path to latest checkpoint')
+parser.add_argument('--epochs', default=200, type=int)
+parser.add_argument('--start_epoch',
+                    default=0,
+                    type=int,
+                    help='manual epoch number (useful on restarts)')
+parser.add_argument('--trainset',
+                    default='CoCo',
+                    type=str,
+                    help="Options: 'CoCo'")
+parser.add_argument('--testsets',
+                    default='CoCA',
+                    type=str,
+                    help="Options: 'CoCA','CoSal2015','CoSOD3k','iCoseg','MSRC'")
+parser.add_argument('--size',
+                    default=224,
+                    type=int,
+                    help='input size')
+parser.add_argument('--tmp', default='/data1/dcfm/temp', help='Temporary folder')
+parser.add_argument('--save_root', default='./CoSODmaps/pred', type=str, help='Output folder')
+args = parser.parse_args()
+config = Config()
+# Prepare dataset
+if args.trainset == 'CoCo':
+    train_img_path = './data/CoCo/img/'
+    train_gt_path = './data/CoCo/gt/'
+    train_loader = get_loader(train_img_path,
+                              train_gt_path,
+                              args.size,
+                              args.bs,
+                              max_num=16, #20,
+                              istrain=True,
+                              shuffle=False,
+                              num_workers=8, #4,
+                              pin=True)
+else:
+    print('Unkonwn train dataset')
+    print(args.dataset)
+for testset in ['CoCA']:
+    if testset == 'CoCA':
+        test_img_path = './data/images/CoCA/'
+        test_gt_path = './data/gts/CoCA/'
+        saved_root = os.path.join(args.save_root, 'CoCA')
+    elif testset == 'CoSOD3k':
+        test_img_path = './data/images/CoSOD3k/'
+        test_gt_path = './data/gts/CoSOD3k/'
+        saved_root = os.path.join(args.save_root, 'CoSOD3k')
+    elif testset == 'CoSal2015':
+        test_img_path = './data/images/CoSal2015/'
+        test_gt_path = './data/gts/CoSal2015/'
+        saved_root = os.path.join(args.save_root, 'CoSal2015')
+    elif testset == 'CoCo':
+        test_img_path = './data/images/CoCo/'
+        test_gt_path = './data/gts/CoCo/'
+        saved_root = os.path.join(args.save_root, 'CoCo')
+    else:
+        print('Unkonwn test dataset')
+        print(args.dataset)
+    test_loader = get_loader(
+        test_img_path, test_gt_path, args.size, 1, istrain=False, shuffle=False, num_workers=8, pin=True)
+# make dir for tmp
+os.makedirs(args.tmp, exist_ok=True)
+# Init log file
+logger = Logger(os.path.join(args.tmp, "log.txt"))
+set_seed(123)
+# Init model
+device = torch.device("cuda")
+model = DCFM()
+model = model.to(device)
+model.apply(weights_init)
+model.dcfmnet.backbone._initialize_weights(torch.load('./models/vgg16-397923af.pth'))
+backbone_params = list(map(id, model.dcfmnet.backbone.parameters()))
+base_params = filter(lambda p: id(p) not in backbone_params,
+                     model.dcfmnet.parameters())
+all_params = [{'params': base_params}, {'params': model.dcfmnet.backbone.parameters(), 'lr': args.lr*0.1}]
+# Setting optimizer
+optimizer = optim.Adam(params=all_params,lr=args.lr, weight_decay=1e-4, betas=[0.9, 0.99])
+for key, value in model.named_parameters():
+    if 'dcfmnet.backbone' in key and 'dcfmnet.backbone.conv5.conv5_3' not in key:
+        value.requires_grad = False
+for key, value in model.named_parameters():
+    print(key,  value.requires_grad)
+# log model and optimizer pars
+logger.info("Model details:")
+logger.info(model)
+logger.info("Optimizer details:")
+logger.info(optimizer)
+logger.info("Scheduler details:")
+# logger.info(scheduler)
+logger.info("Other hyperparameters:")
+logger.info(args)
+# Setting Loss
+exec('from loss import ' + args.loss)
+IOUloss = eval(args.loss+'()')
+def main():
+    val_measures = []
+    # Optionally resume from a checkpoint
+    if args.resume:
+        if os.path.isfile(args.resume):
+            logger.info("=> loading checkpoint '{}'".format(args.resume))
+            checkpoint = torch.load(args.resume)
+            args.start_epoch = checkpoint['epoch']
+            model.dcfmnet.load_state_dict(checkpoint['state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer'])
+            logger.info("=> loaded checkpoint '{}' (epoch {})".format(
+                args.resume, checkpoint['epoch']))
+        else:
+            logger.info("=> no checkpoint found at '{}'".format(args.resume))
+    print(args.epochs)
+    for epoch in range(args.start_epoch, args.epochs):
+        train_loss = train(epoch)
+        if config.validation:
+            measures = validate(model, test_loader, args.testsets)
+            val_measures.append(measures)
+            print(
+                'Validation: S_measure on CoCA for epoch-{} is {:.4f}. Best epoch is epoch-{} with S_measure {:.4f}'.format(
+                    epoch, measures[0], np.argmax(np.array(val_measures)[:, 0].squeeze()),
+                    np.max(np.array(val_measures)[:, 0]))
+            )
+            # Save checkpoint
+        save_checkpoint(
+            {
+                'epoch': epoch + 1,
+                'state_dict': model.dcfmnet.state_dict(),
+                #'scheduler': scheduler.state_dict(),
+            },
+            path=args.tmp)
+        if config.validation:
+            if np.max(np.array(val_measures)[:, 0].squeeze()) == measures[0]:
+                best_weights_before = [os.path.join(args.tmp, weight_file) for weight_file in
+                                       os.listdir(args.tmp) if 'best_' in weight_file]
+                for best_weight_before in best_weights_before:
+                    os.remove(best_weight_before)
+                torch.save(model.dcfmnet.state_dict(),
+                           os.path.join(args.tmp, 'best_ep{}_Smeasure{:.4f}.pth'.format(epoch, measures[0])))
+        if (epoch + 1) % 10 == 0 or epoch == 0:
+            torch.save(model.dcfmnet.state_dict(), args.tmp + '/model-' + str(epoch + 1) + '.pt')
+        if epoch > 188:
+            torch.save(model.dcfmnet.state_dict(), args.tmp+'/model-' + str(epoch + 1) + '.pt')
+    #dcfmnet_dict = model.dcfmnet.state_dict()
+    #torch.save(dcfmnet_dict, os.path.join(args.tmp, 'final.pth'))
+def sclloss(x, xt, xb):
+    cosc = (1+compute_cos_dis(x, xt))*0.5
+    cosb = (1+compute_cos_dis(x, xb))*0.5
+    loss = -torch.log(cosc+1e-5)-torch.log(1-cosb+1e-5)
+    return loss.sum()
+def train(epoch):
+    # Switch to train mode
+    model.train()
+    model.set_mode('train')
+    loss_sum = 0.0
+    loss_sumkl = 0.0
+    for batch_idx, batch in enumerate(train_loader):
+        inputs = batch[0].to(device).squeeze(0)
+        gts = batch[1].to(device).squeeze(0)
+        pred, proto, protogt, protobg = model(inputs, gts)
+        loss_iou = IOUloss(pred, gts)
+        loss_scl = sclloss(proto, protogt, protobg)
+        loss = loss_iou+0.1*loss_scl
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        loss_sum = loss_sum + loss_iou.detach().item()
+        if batch_idx % 20 == 0:
+            logger.info('Epoch[{0}/{1}] Iter[{2}/{3}]  '
+                        'Train Loss: loss_iou: {4:.3f}, loss_scl: {5:.3f} '.format(
+                            epoch,
+                            args.epochs,
+                            batch_idx,
+                            len(train_loader),
+                            loss_iou,
+                            loss_scl,
+                        ))
+    loss_mean = loss_sum / len(train_loader)
+    return loss_sum
+def validate(model, test_loaders, testsets):
+    model.eval()
+    testsets = testsets.split('+')
+    measures = []
+    for testset in testsets[:1]:
+        print('Validating {}...'.format(testset))
+        #test_loader = test_loaders[testset]
+        saved_root = os.path.join(args.save_root, testset)
+        for batch in test_loader:
+            inputs = batch[0].to(device).squeeze(0)
+            gts = batch[1].to(device).squeeze(0)
+            subpaths = batch[2]
+            ori_sizes = batch[3]
+            with torch.no_grad():
+                scaled_preds = model(inputs, gts)[-1].sigmoid()
+            os.makedirs(os.path.join(saved_root, subpaths[0][0].split('/')[0]), exist_ok=True)
+            num = len(scaled_preds)
+            for inum in range(num):
+                subpath = subpaths[inum][0]
+                ori_size = (ori_sizes[inum][0].item(), ori_sizes[inum][1].item())
+                res = nn.functional.interpolate(scaled_preds[inum].unsqueeze(0), size=ori_size, mode='bilinear',
+                                                    align_corners=True)
+                save_tensor_img(res, os.path.join(saved_root, subpath))
+        eval_loader = EvalDataset(
+            saved_root,  # preds
+            os.path.join('./data/gts', testset)  # GT
+        )
+        evaler = Eval_thread(eval_loader, cuda=True)
+        # Use S_measure for validation
+        s_measure = evaler.Eval_Smeasure()
+        if s_measure > config.val_measures['Smeasure']['CoCA'] and 0:
+            # TODO: evluate others measures if s_measure is very high.
+            e_max = evaler.Eval_Emeasure().max().item()
+            f_max = evaler.Eval_fmeasure().max().item()
+            print('Emax: {:4.f}, Fmax: {:4.f}'.format(e_max, f_max))
+        measures.append(s_measure)
+    model.train()
+    return measures
+if __name__ == '__main__':
+    main()

train_wandb.ipynb ADDED Viewed

	@@ -0,0 +1,459 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/wej36how/.conda/envs/vit/lib/python3.8/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "from torch.utils.data import DataLoader\n",
+    "from transformers import AdamW, ViTImageProcessor, ViTForImageClassification\n",
+    "from NWRD_dataset import NWRD\n",
+    "from tqdm import tqdm\n",
+    "import numpy as np\n",
+    "import torch.nn.functional as F\n",
+    "import os\n",
+    "import torch.optim as optim\n",
+    "from torchvision import transforms\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "seed = 42\n",
+    "torch.manual_seed(seed)\n",
+    "np.random.seed(seed)\n",
+    "# If you are using CUDA, set this for further deterministic behavior\n",
+    "if torch.cuda.is_available():\n",
+    "    torch.cuda.manual_seed(seed)\n",
+    "    torch.cuda.manual_seed_all(seed)  # if you are using multi-GPU.\n",
+    "    # Below settings are recommended for deterministic behavior when using specific convolution operations,\n",
+    "    # but may reduce performance\n",
+    "    torch.backends.cudnn.deterministic = True\n",
+    "    torch.backends.cudnn.benchmark = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "cpu\n"
+     ]
+    }
+   ],
+   "source": [
+    "device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')\n",
+    "CUDA_LAUNCH_BLOCKING=1\n",
+    "TORCH_USE_CUDA_DSA=1\n",
+    "print(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transformations = transforms.Compose([\n",
+    "    transforms.Resize((224, 224)),  # Resize the image to 224x224\n",
+    "    transforms.ToTensor()            # Convert the image to a PyTorch tensor\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: 'C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\train\\\\calssification/rust'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m train_ds \u001b[38;5;241m=\u001b[39m \u001b[43mNWRD\u001b[49m\u001b[43m(\u001b[49m\u001b[43mroot_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mC:\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mUsers\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mhasee\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mDesktop\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mGermany_2024\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mDataset\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mNWRDprocessed\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mtrain\u001b[39;49m\u001b[38;5;130;43;01m\\\\\u001b[39;49;00m\u001b[38;5;124;43mcalssification\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtransform\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtransformations\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m val_ds \u001b[38;5;241m=\u001b[39m NWRD(root_dir\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mC:\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mUsers\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mhasee\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mDesktop\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mGermany_2024\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mDataset\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mNWRDprocessed\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mval\u001b[39m\u001b[38;5;130;01m\\\\\u001b[39;00m\u001b[38;5;124mcalssification\u001b[39m\u001b[38;5;124m\"\u001b[39m, train\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, transform\u001b[38;5;241m=\u001b[39mtransformations)\n\u001b[1;32m      4\u001b[0m train_loader \u001b[38;5;241m=\u001b[39m DataLoader(train_ds, batch_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8\u001b[39m, shuffle\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n",
+      "File \u001b[0;32m~/codes/crossvit/NWRD_dataset.py:12\u001b[0m, in \u001b[0;36mNWRD.__init__\u001b[0;34m(self, root_dir, transform, train)\u001b[0m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mimages \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m     11\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabels \u001b[38;5;241m=\u001b[39m []\n\u001b[0;32m---> 12\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/codes/crossvit/NWRD_dataset.py:19\u001b[0m, in \u001b[0;36mNWRD.load_data\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     16\u001b[0m non_rust_dir \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mroot_dir, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnon_rust\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     18\u001b[0m \u001b[38;5;66;03m# Load rust images\u001b[39;00m\n\u001b[0;32m---> 19\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m filename \u001b[38;5;129;01min\u001b[39;00m \u001b[43mos\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlistdir\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrust_dir\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m     20\u001b[0m     filepath \u001b[38;5;241m=\u001b[39m os\u001b[38;5;241m.\u001b[39mpath\u001b[38;5;241m.\u001b[39mjoin(rust_dir, filename)\n\u001b[1;32m     21\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mimages\u001b[38;5;241m.\u001b[39mappend(filepath)\n",
+      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\train\\\\calssification/rust'"
+     ]
+    }
+   ],
+   "source": [
+    "train_ds = NWRD(root_dir=\"C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\train\\\\calssification\", train=True, transform=transformations)\n",
+    "val_ds = NWRD(root_dir=\"C:\\\\Users\\\\hasee\\\\Desktop\\\\Germany_2024\\\\Dataset\\\\NWRDprocessed\\\\val\\\\calssification\", train=False, transform=transformations)\n",
+    "                                                                            \n",
+    "train_loader = DataLoader(train_ds, batch_size=8, shuffle=True)\n",
+    "val_loader = DataLoader(val_ds, batch_size=8, shuffle=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mean = [0.485, 0.456, 0.406]  # Mean values for RGB channels\n",
+    "std = [0.229, 0.224, 0.225]   # Standard deviation values for RGB channels\n",
+    "#processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224',transform={'mean': mean, 'std': std})\n",
+    "processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')\n",
+    "model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')\n",
+    "# processor.image_mean=mean\n",
+    "# processor.image_std=std\n",
+    "#print(processor)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "ViTForImageClassification(\n",
+       "  (vit): ViTModel(\n",
+       "    (embeddings): ViTEmbeddings(\n",
+       "      (patch_embeddings): ViTPatchEmbeddings(\n",
+       "        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))\n",
+       "      )\n",
+       "      (dropout): Dropout(p=0.0, inplace=False)\n",
+       "    )\n",
+       "    (encoder): ViTEncoder(\n",
+       "      (layer): ModuleList(\n",
+       "        (0-11): 12 x ViTLayer(\n",
+       "          (attention): ViTSdpaAttention(\n",
+       "            (attention): ViTSdpaSelfAttention(\n",
+       "              (query): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (key): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (value): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.0, inplace=False)\n",
+       "            )\n",
+       "            (output): ViTSelfOutput(\n",
+       "              (dense): Linear(in_features=768, out_features=768, bias=True)\n",
+       "              (dropout): Dropout(p=0.0, inplace=False)\n",
+       "            )\n",
+       "          )\n",
+       "          (intermediate): ViTIntermediate(\n",
+       "            (dense): Linear(in_features=768, out_features=3072, bias=True)\n",
+       "            (intermediate_act_fn): GELUActivation()\n",
+       "          )\n",
+       "          (output): ViTOutput(\n",
+       "            (dense): Linear(in_features=3072, out_features=768, bias=True)\n",
+       "            (dropout): Dropout(p=0.0, inplace=False)\n",
+       "          )\n",
+       "          (layernorm_before): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+       "          (layernorm_after): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+       "        )\n",
+       "      )\n",
+       "    )\n",
+       "    (layernorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)\n",
+       "  )\n",
+       "  (classifier): Linear(in_features=768, out_features=2, bias=True)\n",
+       ")"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.classifier = torch.nn.Linear(model.config.hidden_size, 2)\n",
+    "model.to(device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Finetuning of the model based on pretraining weights."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# model_weights = torch.load('/home/Hirra/coding_files/crossvit/weights/wandb_vit_base_final_med_val_NWRD_epoch_50_lr_0.000000001_wd_0.001_batch_size_8_unaugmented_unequlaized/49.pth')\n",
+    "# model.load_state_dict(model_weights.state_dict())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimizer = optim.SGD(model.parameters(), lr=0.00000003, weight_decay=0.001)\n",
+    "criterion = torch.nn.CrossEntropyLoss()\n",
+    "weights_directory = 'wandb_vit_base_final_for_time_NWRD_epoch_50_lr_0.000000003_wd_0.001_batch_size_8_unaugmented_training'\n",
+    "weight_loc = f\"weights/{weights_directory}\"\n",
+    "os.makedirs(weight_loc, exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mgptautomated\u001b[0m (\u001b[33mtukl_labwork\u001b[0m). Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m If you're specifying your api key in code, ensure this code is not shared publicly.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m Consider setting the WANDB_API_KEY environment variable, or running `wandb login` from the command line.\n",
+      "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: C:\\Users\\hasee\\.netrc\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "True"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import wandb, os\n",
+    "#wandb.login()\n",
+    "wandb.login(key=\"4e8a21c26ae61cced8d70053c80bbe1b112fec12\")\n",
+    "#4e8a21c26ae61cced8d70053c80bbe1b112fec12"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "env: WANDB_PROJECT=crossvit_rust_classifier_new\n"
+     ]
+    }
+   ],
+   "source": [
+    "%env WANDB_PROJECT=crossvit_rust_classifier_new\n",
+    "os.environ[\"WANDB_PROJECT\"] = \"<crossvit>\"\n",
+    "os.environ[\"WANDB_REPORT_TO\"] = \"wandb\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "Changes to your `wandb` environment variables will be ignored because your `wandb` session has already started. For more information on how to modify your settings with `wandb.init()` arguments, please refer to <a href='https://wandb.me/wandb-init' target=\"_blank\">the W&B docs</a>."
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "wandb version 0.17.3 is available!  To upgrade, please run:\n",
+       " $ pip install wandb --upgrade"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Tracking run with wandb version 0.17.2"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Run data is saved locally in <code>c:\\Users\\hasee\\Desktop\\Germany_2024\\codes\\crossvit\\wandb\\run-20240626_161631-bgtm3oyt</code>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       "Syncing run <strong><a href='https://wandb.ai/tukl_labwork/uncategorized/runs/bgtm3oyt' target=\"_blank\">glamorous-wood-74</a></strong> to <a href='https://wandb.ai/tukl_labwork/uncategorized' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View project at <a href='https://wandb.ai/tukl_labwork/uncategorized' target=\"_blank\">https://wandb.ai/tukl_labwork/uncategorized</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "text/html": [
+       " View run at <a href='https://wandb.ai/tukl_labwork/uncategorized/runs/bgtm3oyt' target=\"_blank\">https://wandb.ai/tukl_labwork/uncategorized/runs/bgtm3oyt</a>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "  0%|          | 0/241 [00:00<?, ?it/s]c:\\Users\\hasee\\miniconda3\\envs\\segformer\\Lib\\site-packages\\transformers\\models\\vit\\modeling_vit.py:253: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at C:\\cb\\pytorch_1000000000000\\work\\aten\\src\\ATen\\native\\transformers\\cuda\\sdp_utils.cpp:455.)\n",
+      "  context_layer = torch.nn.functional.scaled_dot_product_attention(\n",
+      "Epoch 0 train Loss 0.6551:  21%|██        | 51/241 [00:27<01:42,  1.85it/s]\n"
+     ]
+    },
+    {
+     "ename": "KeyboardInterrupt",
+     "evalue": "",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[12], line 22\u001b[0m\n\u001b[0;32m     17\u001b[0m \u001b[38;5;66;03m# print(\"logits\", logits)\u001b[39;00m\n\u001b[0;32m     18\u001b[0m \u001b[38;5;66;03m# print(\"prediction\", predication)\u001b[39;00m\n\u001b[0;32m     19\u001b[0m \u001b[38;5;66;03m# print(\"labels\", labels)\u001b[39;00m\n\u001b[0;32m     21\u001b[0m loss \u001b[38;5;241m=\u001b[39m criterion(logits, labels)\n\u001b[1;32m---> 22\u001b[0m train_losses\u001b[38;5;241m.\u001b[39mappend(\u001b[43mloss\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mitem\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m     23\u001b[0m loss\u001b[38;5;241m.\u001b[39mbackward()\n\u001b[0;32m     24\u001b[0m optimizer\u001b[38;5;241m.\u001b[39mstep()\n",
+      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+     ]
+    }
+   ],
+   "source": [
+    "wandb.init()\n",
+    "\n",
+    "best_epoch = {}\n",
+    "train_losses = []\n",
+    "for epoch in range(50):\n",
+    "    model.train\n",
+    "    train_losses=[]\n",
+    "    loop = tqdm(enumerate(train_loader), total=len(train_loader))\n",
+    "    for batch_idx, (images, labels) in loop:\n",
+    "        inputs = processor(images=images, return_tensors=\"pt\", do_rescale=False).to(device)\n",
+    "        labels = labels.to(device)\n",
+    "\n",
+    "        outputs = model(**inputs)\n",
+    "        logits = outputs.logits\n",
+    "        predication = logits.argmax(axis=1)\n",
+    "        \n",
+    "        # print(\"logits\", logits)\n",
+    "        # print(\"prediction\", predication)\n",
+    "        # print(\"labels\", labels)\n",
+    "        \n",
+    "        loss = criterion(logits, labels)\n",
+    "        train_losses.append(loss.item())\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        loop.set_description(f\"Epoch {epoch} train Loss {np.mean(train_losses):.4f}\")\n",
+    "\n",
+    "\n",
+    "    print(\"Epoch \"+str(epoch)+\" Train Loss \"+str(np.mean(train_losses)))\n",
+    "    torch.save(model, weight_loc+'/{}.pth'.format(epoch))\n",
+    "    wandb.log({\"train_loss\": np.mean(train_losses), \"epoch\": epoch})\n",
+    "\n",
+    "    #validation\n",
+    "    optimizer.zero_grad()\n",
+    "    model.eval\n",
+    "    val_losses=[]\n",
+    "\n",
+    "    loop = tqdm(enumerate(val_loader), total=len(val_loader))\n",
+    "    with torch.no_grad():\n",
+    "        for batch_idx, (images, labels) in loop:\n",
+    "            inputs = processor(images=images, return_tensors=\"pt\", do_rescale=False).to(device)\n",
+    "            labels = labels.to(device)\n",
+    "\n",
+    "            outputs = model(**inputs)\n",
+    "            logits = outputs.logits\n",
+    "            \n",
+    "            loss = criterion(logits, labels)\n",
+    "            val_losses.append(loss.item())\n",
+    "\n",
+    "            predication = logits.argmax(axis=1)\n",
+    "\n",
+    "            loss = criterion(logits, labels)\n",
+    "            val_losses.append(loss.item())\n",
+    "        \n",
+    "            loop.set_description(f\"Epoch {epoch} Val Loss {np.mean(val_losses):.4f}\")\n",
+    "    wandb.log({\"val_loss\": np.mean(val_losses), \"epoch\": epoch})\n",
+    "torch.cuda.empty_cache()\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "crossvit",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

util.cpython-38.pyc ADDED Viewed

Binary file (3.5 kB). View file

util.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import logging
+import os
+import torch
+import shutil
+from torchvision import transforms
+import numpy as np
+import random
+import cv2
+class Logger():
+    def __init__(self, path="log.txt"):
+        self.logger = logging.getLogger('DCFM')
+        self.file_handler = logging.FileHandler(path, "w")
+        self.stdout_handler = logging.StreamHandler()
+        self.stdout_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+        self.file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+        self.logger.addHandler(self.file_handler)
+        self.logger.addHandler(self.stdout_handler)
+        self.logger.setLevel(logging.INFO)
+        self.logger.propagate = False
+    def info(self, txt):
+        self.logger.info(txt)
+    def close(self):
+        self.file_handler.close()
+        self.stdout_handler.close()
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.val = 0.0
+        self.avg = 0.0
+        self.sum = 0.0
+        self.count = 0.0
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+def save_checkpoint(state, path, filename="checkpoint.pth"):
+    torch.save(state, os.path.join(path, filename))
+def save_tensor_img(tenor_im, path):
+    im = tenor_im.cpu().clone()
+    im = im.squeeze(0)
+    tensor2pil = transforms.ToPILImage()
+    im = tensor2pil(im)
+    im.save(path)
+def save_tensor_merge(tenor_im, tensor_mask, path, colormap='HOT'):
+    im = tenor_im.cpu().detach().clone()
+    im = im.squeeze(0).numpy()
+    im = ((im - np.min(im)) / (np.max(im) - np.min(im) + 1e-20)) * 255
+    im = np.array(im,np.uint8)
+    mask = tensor_mask.cpu().detach().clone()
+    mask = mask.squeeze(0).numpy()
+    mask = ((mask - np.min(mask)) / (np.max(mask) - np.min(mask) + 1e-20)) * 255
+    mask = np.clip(mask, 0, 255)
+    mask = np.array(mask, np.uint8)
+    if colormap == 'HOT':
+        mask = cv2.applyColorMap(mask[0,:,:], cv2.COLORMAP_HOT)
+    elif colormap == 'PINK':
+        mask = cv2.applyColorMap(mask[0,:,:], cv2.COLORMAP_PINK)
+    elif colormap == 'BONE':
+        mask = cv2.applyColorMap(mask[0,:,:], cv2.COLORMAP_BONE)
+    # exec('cv2.applyColorMap(mask[0,:,:], cv2.COLORMAP_' + colormap+')')
+    im = im.transpose((1, 2, 0))
+    im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
+    mix = cv2.addWeighted(im, 0.3, mask, 0.7, 0)
+    cv2.imwrite(path, mix)
+def set_seed(seed):
+     torch.manual_seed(seed)
+     torch.cuda.manual_seed(seed)
+     torch.cuda.manual_seed_all(seed)
+     np.random.seed(seed)
+     random.seed(seed)
+     torch.backends.cudnn.deterministic = True
+     torch.backends.cudnn.benchmark = False

utils.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

utils.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import torch
+import torch.nn.functional as F
+import torch.nn as nn
+import utils.utils as gen_utils
+import numpy as np
+def adjust_rate_poly(cur_iter, max_iter, power=0.9):
+    return (1.0 - 1.0 * cur_iter / max_iter) ** power
+def adjust_learning_rate_exp(lr, optimizer, iters, decay_rate=0.1, decay_step=25):
+    lr = lr * (decay_rate ** (iters // decay_step))
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr * param_group['lr_mult']
+def adjust_learning_rate_RevGrad(lr, optimizer, max_iter, cur_iter,
+        alpha=10, beta=0.75):
+    p = 1.0 * cur_iter / (max_iter - 1)
+    lr = lr / pow(1.0 + alpha * p, beta)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr * param_group['lr_mult']
+def adjust_learning_rate_inv(lr, optimizer, iters, alpha=0.001, beta=0.75):
+    lr = lr / pow(1.0 + alpha * iters, beta)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr * param_group['lr_mult']
+def adjust_learning_rate_step(lr, optimizer, iters, steps, beta=0.1):
+    n = 0
+    for step in steps:
+        if iters < step:
+            break
+        n += 1
+    lr = lr * (beta ** n)
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr * param_group['lr_mult']
+def adjust_learning_rate_poly(lr, optimizer, iters, max_iter, power=0.9):
+    lr = lr * (1.0 - 1.0 * iters / max_iter) ** power
+    for param_group in optimizer.param_groups:
+        param_group['lr'] = lr * param_group['lr_mult']
+def set_param_groups(net, lr_mult_dict={}):
+    params = []
+    if hasattr(net, "module"):
+        net = net.module
+    modules = net._modules
+    for name in modules:
+        module = modules[name]
+        if name in lr_mult_dict:
+            params += [{'params': module.parameters(), \
+                    'lr_mult': lr_mult_dict[name]}]
+        else:
+            params += [{'params': module.parameters(), 'lr_mult': 1.0}]
+    return params
+def LSR(x, dim=1, thres=10.0):
+    lsr = -1.0 * torch.mean(x, dim=dim)
+    if thres > 0.0:
+        return torch.mean((lsr/thres-1.0).detach() * lsr)
+    else:
+        return torch.mean(lsr)
+def crop(feats, preds, gt, h, w):
+    H, W = feats.shape[-2:]
+    tmp_feats = []
+    tmp_preds = []
+    tmp_gt = []
+    N = feats.size(0)
+    for i in range(N):
+        inds_H = torch.randperm(H)[0:h]
+        inds_W = torch.randperm(W)[0:w]
+        tmp_feats += [feats[i, :, inds_H[:, None], inds_W]]
+        tmp_preds += [preds[i, :, inds_H[:, None], inds_W]]
+        tmp_gt += [gt[i, inds_H[:, None], inds_W]]
+    new_feats = torch.stack(tmp_feats, dim=0)
+    new_gt = torch.stack(tmp_gt, dim=0)
+    new_preds = torch.stack(tmp_preds, dim=0)
+    probs = F.softmax(new_preds, dim=1)
+    return new_feats, probs, new_gt

vgg.cpython-37.pyc ADDED Viewed

Binary file (3.75 kB). View file

vgg.cpython-38.pyc ADDED Viewed

Binary file (3.78 kB). View file

vgg.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import torch
+import torch.nn as nn
+import os
+class VGG_Backbone(nn.Module):
+    # VGG16 with two branches
+    # pooling layer at the front of block
+    def __init__(self):
+        super(VGG_Backbone, self).__init__()
+        conv1 = nn.Sequential()
+        conv1.add_module('conv1_1', nn.Conv2d(3, 64, 3, 1, 1))
+        conv1.add_module('relu1_1', nn.ReLU(inplace=True))
+        conv1.add_module('conv1_2', nn.Conv2d(64, 64, 3, 1, 1))
+        conv1.add_module('relu1_2', nn.ReLU(inplace=True))
+        self.conv1 = conv1
+        conv2 = nn.Sequential()
+        conv2.add_module('pool1', nn.MaxPool2d(2, stride=2))
+        conv2.add_module('conv2_1', nn.Conv2d(64, 128, 3, 1, 1))
+        conv2.add_module('relu2_1', nn.ReLU())
+        conv2.add_module('conv2_2', nn.Conv2d(128, 128, 3, 1, 1))
+        conv2.add_module('relu2_2', nn.ReLU())
+        self.conv2 = conv2
+        conv3 = nn.Sequential()
+        conv3.add_module('pool2', nn.MaxPool2d(2, stride=2))
+        conv3.add_module('conv3_1', nn.Conv2d(128, 256, 3, 1, 1))
+        conv3.add_module('relu3_1', nn.ReLU())
+        conv3.add_module('conv3_2', nn.Conv2d(256, 256, 3, 1, 1))
+        conv3.add_module('relu3_2', nn.ReLU())
+        conv3.add_module('conv3_3', nn.Conv2d(256, 256, 3, 1, 1))
+        conv3.add_module('relu3_3', nn.ReLU())
+        self.conv3 = conv3
+        conv4 = nn.Sequential()
+        conv4.add_module('pool3', nn.MaxPool2d(2, stride=2))
+        conv4.add_module('conv4_1', nn.Conv2d(256, 512, 3, 1, 1))
+        conv4.add_module('relu4_1', nn.ReLU())
+        conv4.add_module('conv4_2', nn.Conv2d(512, 512, 3, 1, 1))
+        conv4.add_module('relu4_2', nn.ReLU())
+        conv4.add_module('conv4_3', nn.Conv2d(512, 512, 3, 1, 1))
+        conv4.add_module('relu4_3', nn.ReLU())
+        self.conv4 = conv4
+        conv5 = nn.Sequential()
+        conv5.add_module('pool4', nn.MaxPool2d(2, stride=2))
+        conv5.add_module('conv5_1', nn.Conv2d(512, 512, 3, 1, 1))
+        conv5.add_module('relu5_1', nn.ReLU())
+        conv5.add_module('conv5_2', nn.Conv2d(512, 512, 3, 1, 1))
+        conv5.add_module('relu5_2', nn.ReLU())
+        conv5.add_module('conv5_3', nn.Conv2d(512, 512, 3, 1, 1))
+        conv5.add_module('relu5_3', nn.ReLU())
+        self.conv5 = conv5
+        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
+        self.classifier = nn.Sequential(
+            nn.Linear(512 * 7 * 7, 4096),
+            nn.ReLU(True),
+            nn.Dropout(),
+            nn.Linear(4096, 4096),
+            nn.ReLU(True),
+            nn.Dropout(),
+            nn.Linear(4096, 1000),
+        )
+        # pre_train = torch.load(os.path.dirname(__file__) + '/vgg16-397923af.pth')
+        pre_train = torch.load("/scratch/wej36how/codes/DCFM-master/vgg16-397923af.pth")
+        self._initialize_weights(pre_train)
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x1 = self.conv4_1(x)
+        x1 = self.conv5_1(x1)
+        x1 = self.avgpool(x1)
+        _x1 = x1.view(x1.size(0), -1)
+        pred_vector = self.classifier(_x1)
+        x2 = self.conv4_2(x)
+        x2 = self.conv5_2(x2)
+        return x1, pred_vector, x2
+    def _initialize_weights(self, pre_train):
+        keys = list(pre_train.keys())
+        self.conv1.conv1_1.weight.data.copy_(pre_train[keys[0]])
+        self.conv1.conv1_2.weight.data.copy_(pre_train[keys[2]])
+        self.conv2.conv2_1.weight.data.copy_(pre_train[keys[4]])
+        self.conv2.conv2_2.weight.data.copy_(pre_train[keys[6]])
+        self.conv3.conv3_1.weight.data.copy_(pre_train[keys[8]])
+        self.conv3.conv3_2.weight.data.copy_(pre_train[keys[10]])
+        self.conv3.conv3_3.weight.data.copy_(pre_train[keys[12]])
+        self.conv4.conv4_1.weight.data.copy_(pre_train[keys[14]])
+        self.conv4.conv4_2.weight.data.copy_(pre_train[keys[16]])
+        self.conv4.conv4_3.weight.data.copy_(pre_train[keys[18]])
+        self.conv5.conv5_1.weight.data.copy_(pre_train[keys[20]])
+        self.conv5.conv5_2.weight.data.copy_(pre_train[keys[22]])
+        self.conv5.conv5_3.weight.data.copy_(pre_train[keys[24]])
+        self.conv1.conv1_1.bias.data.copy_(pre_train[keys[1]])
+        self.conv1.conv1_2.bias.data.copy_(pre_train[keys[3]])
+        self.conv2.conv2_1.bias.data.copy_(pre_train[keys[5]])
+        self.conv2.conv2_2.bias.data.copy_(pre_train[keys[7]])
+        self.conv3.conv3_1.bias.data.copy_(pre_train[keys[9]])
+        self.conv3.conv3_2.bias.data.copy_(pre_train[keys[11]])
+        self.conv3.conv3_3.bias.data.copy_(pre_train[keys[13]])
+        self.conv4.conv4_1.bias.data.copy_(pre_train[keys[15]])
+        self.conv4.conv4_2.bias.data.copy_(pre_train[keys[17]])
+        self.conv4.conv4_3.bias.data.copy_(pre_train[keys[19]])
+        self.conv5.conv5_1.bias.data.copy_(pre_train[keys[21]])
+        self.conv5.conv5_2.bias.data.copy_(pre_train[keys[23]])
+        self.conv5.conv5_3.bias.data.copy_(pre_train[keys[25]])
+        self.classifier[0].weight.data.copy_(pre_train[keys[26]])
+        self.classifier[0].bias.data.copy_(pre_train[keys[27]])
+        self.classifier[3].weight.data.copy_(pre_train[keys[28]])
+        self.classifier[3].bias.data.copy_(pre_train[keys[29]])
+        self.classifier[6].weight.data.copy_(pre_train[keys[30]])
+        self.classifier[6].bias.data.copy_(pre_train[keys[31]])