Spaces:

baharay
/

tempsal

Runtime error

App Files Files Community

baharay commited on Oct 2, 2024

Commit

b728ad5

verified ·

1 Parent(s): a3fc39c

Upload 30 files

Browse files

Files changed (30) hide show

PNAS/PNASnet.py +140 -0
PNAS/__init__.py +1 -0
PNAS/genotypes.py +33 -0
PNAS/operations.py +115 -0
README.md +1 -12
checkpoints/Readme.txt +2 -0
dataloader_clean.py +52 -0
generate_volumes.py +57 -0
inference.ipynb +0 -0
loss.py +256 -0
model.py +322 -0
requirements.txt +15 -0
testing/.DS_Store +0 -0
testing/gt/COCO_val2014_000000000192.png +0 -0
testing/gt/COCO_val2014_000000000192_0.png +0 -0
testing/gt/COCO_val2014_000000000192_1.png +0 -0
testing/gt/COCO_val2014_000000000192_2.png +0 -0
testing/gt/COCO_val2014_000000000192_3.png +0 -0
testing/gt/COCO_val2014_000000000192_4.png +0 -0
testing/gt/COCO_val2014_000000000208.png +0 -0
testing/gt/COCO_val2014_000000000208_0.png +0 -0
testing/gt/COCO_val2014_000000000208_1.png +0 -0
testing/gt/COCO_val2014_000000000208_2.png +0 -0
testing/gt/COCO_val2014_000000000208_3.png +0 -0
testing/gt/COCO_val2014_000000000208_4.png +0 -0
testing/images/COCO_val2014_000000000192.jpg +0 -0
testing/images/COCO_val2014_000000000208.jpg +0 -0
testing/predictions/Readme.txt +1 -0
train.py +215 -0
utils.py +254 -0

PNAS/PNASnet.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import torch
+import torch.nn as nn
+from operations import *
+from torch.autograd import Variable
+# from utils import drop_path
+class Cell(nn.Module):
+  def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, reduction_prev):
+    super(Cell, self).__init__()
+    print(C_prev_prev, C_prev, C)
+    self.reduction = reduction
+    if reduction_prev is None:
+      self.preprocess0 = Identity()
+    elif reduction_prev is True:
+      self.preprocess0 = FactorizedReduce(C_prev_prev, C)
+    else:
+      self.preprocess0 = ReLUConvBN(C_prev_prev, C, 1, 1, 0)
+    self.preprocess1 = ReLUConvBN(C_prev, C, 1, 1, 0)
+    if reduction:
+      op_names, indices = zip(*genotype.reduce)
+      concat = genotype.reduce_concat
+    else:
+      op_names, indices = zip(*genotype.normal)
+      concat = genotype.normal_concat
+    assert len(op_names) == len(indices)
+    self._steps = len(op_names) // 2
+    self._concat = concat
+    self.multiplier = len(concat)
+    self._ops = nn.ModuleList()
+    for name, index in zip(op_names, indices):
+      stride = 2 if reduction and index < 2 else 1
+      if reduction_prev is None and index == 0:
+        op = OPS[name](C_prev_prev, C, stride, True)
+      else:
+        op = OPS[name](C, C, stride, True)
+      self._ops += [op]
+    self._indices = indices
+  def forward(self, s0, s1, drop_prob):
+    s0 = self.preprocess0(s0)
+    s1 = self.preprocess1(s1)
+    states = [s0, s1]
+    for i in range(self._steps):
+      h1 = states[self._indices[2*i]]
+      h2 = states[self._indices[2*i+1]]
+      op1 = self._ops[2*i]
+      op2 = self._ops[2*i+1]
+      h1 = op1(h1)
+      h2 = op2(h2)
+      # if self.training and drop_prob > 0.:
+      #   if not isinstance(op1, Identity):
+      #     h1 = drop_path(h1, drop_prob)
+      #   if not isinstance(op2, Identity):
+      #     h2 = drop_path(h2, drop_prob)
+      s = h1 + h2
+      states += [s]
+    return torch.cat([states[i] for i in self._concat], dim=1)
+class AuxiliaryHeadImageNet(nn.Module):
+  def __init__(self, C, num_classes):
+    """assuming input size 14x14"""
+    super(AuxiliaryHeadImageNet, self).__init__()
+    self.features = nn.Sequential(
+      nn.ReLU(inplace=True),
+      nn.AvgPool2d(5, stride=2, padding=0, count_include_pad=False),
+      nn.Conv2d(C, 128, 1, bias=False),
+      nn.BatchNorm2d(128),
+      nn.ReLU(inplace=True),
+      nn.Conv2d(128, 768, 2, bias=False),
+      nn.BatchNorm2d(768),
+      nn.ReLU(inplace=True)
+    )
+    self.classifier = nn.Linear(768, num_classes)
+  def forward(self, x):
+    x = self.features(x)
+    x = self.classifier(x.view(x.size(0),-1))
+    return x
+class NetworkImageNet(nn.Module):
+  def __init__(self, C, num_classes, layers, auxiliary, genotype):
+    super(NetworkImageNet, self).__init__()
+    self._layers = layers
+    self._auxiliary = auxiliary
+    self.conv0 = nn.Conv2d(3, 96, kernel_size=3, stride=2, padding=0, bias=False)
+    self.conv0_bn = nn.BatchNorm2d(96, eps=1e-3)
+    self.stem1 = Cell(genotype, 96, 96, C // 4, True, None)
+    self.stem2 = Cell(genotype, 96, C * self.stem1.multiplier // 4, C // 2, True, True)
+    C_prev_prev, C_prev, C_curr = C * self.stem1.multiplier // 4, C * self.stem2.multiplier // 2, C
+    self.cells = nn.ModuleList()
+    reduction_prev = True
+    for i in range(layers):
+      if i in [layers // 3, 2 * layers // 3]:
+        C_curr *= 2
+        reduction = True
+      else:
+        reduction = False
+      cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, reduction_prev)
+      reduction_prev = reduction
+      self.cells += [cell]
+      C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+      if i == 2 * layers // 3:
+        C_to_auxiliary = C_prev
+    if auxiliary:
+      self.auxiliary_head = AuxiliaryHeadImageNet(C_to_auxiliary, num_classes)
+    self.relu = nn.ReLU(inplace=False)
+    self.global_pooling = nn.AdaptiveAvgPool2d(1)
+    self.classifier = nn.Linear(C_prev, num_classes)
+  def forward(self, input):
+    logits_aux = None
+    s0 = self.conv0(input)
+    s0 = self.conv0_bn(s0)
+    s1 = self.stem1(s0, s0, self.drop_path_prob)
+    s0, s1 = s1, self.stem2(s0, s1, self.drop_path_prob)
+    for i, cell in enumerate(self.cells):
+      s0, s1 = s1, cell(s0, s1, self.drop_path_prob)
+      if i == 2 * self._layers // 3:
+        if self._auxiliary and self.training:
+          logits_aux = self.auxiliary_head(s1)
+    s1 = self.relu(s1)
+    out = self.global_pooling(s1)
+    logits = self.classifier(out.view(out.size(0), -1))
+    return logits, logits_aux

PNAS/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

PNAS/genotypes.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from collections import namedtuple
+Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat')
+PNASNet = Genotype(
+  normal = [
+    ('sep_conv_5x5', 0),
+    ('max_pool_3x3', 0),
+    ('sep_conv_7x7', 1),
+    ('max_pool_3x3', 1),
+    ('sep_conv_5x5', 1),
+    ('sep_conv_3x3', 1),
+    ('sep_conv_3x3', 4),
+    ('max_pool_3x3', 1),
+    ('sep_conv_3x3', 0),
+    ('skip_connect', 1),
+  ],
+  normal_concat = [2, 3, 4, 5, 6],
+  reduce = [
+    ('sep_conv_5x5', 0),
+    ('max_pool_3x3', 0),
+    ('sep_conv_7x7', 1),
+    ('max_pool_3x3', 1),
+    ('sep_conv_5x5', 1),
+    ('sep_conv_3x3', 1),
+    ('sep_conv_3x3', 4),
+    ('max_pool_3x3', 1),
+    ('sep_conv_3x3', 0),
+    ('skip_connect', 1),
+  ],
+  reduce_concat = [2, 3, 4, 5, 6],
+)

PNAS/operations.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import torch
+import torch.nn as nn
+OPS = {
+  'none' : lambda C_in, C_out, stride, affine: Zero(stride),
+  'avg_pool_3x3' : lambda C_in, C_out, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False) if C_in == C_out else nn.Sequential(
+    nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False),
+    nn.Conv2d(C_in, C_out, 1, stride=1, padding=0, bias=False),
+    nn.BatchNorm2d(C_out, eps=1e-3, affine=affine)
+    ),
+  'max_pool_3x3' : lambda C_in, C_out, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1) if C_in == C_out else nn.Sequential(
+    nn.MaxPool2d(3, stride=stride, padding=1),
+    nn.Conv2d(C_in, C_out, 1, stride=1, padding=0, bias=False),
+    nn.BatchNorm2d(C_out, eps=1e-3, affine=affine)
+    ),
+  'skip_connect' : lambda C_in, C_out, stride, affine: Identity() if stride == 1 else ReLUConvBN(C_in, C_out, 1, stride, 0, affine=affine),
+  'sep_conv_3x3' : lambda C_in, C_out, stride, affine: SepConv(C_in, C_out, 3, stride, 1, affine=affine),
+  'sep_conv_5x5' : lambda C_in, C_out, stride, affine: SepConv(C_in, C_out, 5, stride, 2, affine=affine),
+  'sep_conv_7x7' : lambda C_in, C_out, stride, affine: SepConv(C_in, C_out, 7, stride, 3, affine=affine),
+  'dil_conv_3x3' : lambda C_in, C_out, stride, affine: DilConv(C_in, C_out, 3, stride, 2, 2, affine=affine),
+  'dil_conv_5x5' : lambda C_in, C_out, stride, affine: DilConv(C_in, C_out, 5, stride, 4, 2, affine=affine),
+  'conv_7x1_1x7' : lambda C_in, C_out, stride, affine: nn.Sequential(
+    nn.ReLU(inplace=False),
+    nn.Conv2d(C_in, C_in, (1,7), stride=(1, stride), padding=(0, 3), bias=False),
+    nn.Conv2d(C_in, C_out, (7,1), stride=(stride, 1), padding=(3, 0), bias=False),
+    nn.BatchNorm2d(C_out, eps=1e-3, affine=affine)
+    ),
+}
+class ReLUConvBN(nn.Module):
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
+    super(ReLUConvBN, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False),
+      nn.BatchNorm2d(C_out, eps=1e-3, affine=affine)
+    )
+  def forward(self, x):
+    return self.op(x)
+class DilConv(nn.Module):
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, dilation, affine=True):
+    super(DilConv, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_out, eps=1e-3, affine=affine),
+      )
+  def forward(self, x):
+    return self.op(x)
+class SepConv(nn.Module):
+  def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True):
+    super(SepConv, self).__init__()
+    self.op = nn.Sequential(
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_in, C_in, kernel_size=kernel_size, stride=stride, padding=padding, groups=C_in, bias=False),
+      nn.Conv2d(C_in, C_out, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_out, eps=1e-3, affine=affine),
+      nn.ReLU(inplace=False),
+      nn.Conv2d(C_out, C_out, kernel_size=kernel_size, stride=1, padding=padding, groups=C_out, bias=False),
+      nn.Conv2d(C_out, C_out, kernel_size=1, padding=0, bias=False),
+      nn.BatchNorm2d(C_out, eps=1e-3, affine=affine),
+      )
+  def forward(self, x):
+    return self.op(x)
+class Identity(nn.Module):
+  def __init__(self):
+    super(Identity, self).__init__()
+  def forward(self, x):
+    return x
+class Zero(nn.Module):
+  def __init__(self, stride):
+    super(Zero, self).__init__()
+    self.stride = stride
+  def forward(self, x):
+    if self.stride == 1:
+      return x.mul(0.)
+    return x[:,:,::self.stride,::self.stride].mul(0.)
+class FactorizedReduce(nn.Module):
+  def __init__(self, C_in, C_out, affine=True):
+    super(FactorizedReduce, self).__init__()
+    assert C_out % 2 == 0
+    self.relu = nn.ReLU(inplace=False)
+    self.conv_1 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
+    self.conv_2 = nn.Conv2d(C_in, C_out // 2, 1, stride=2, padding=0, bias=False)
+    self.bn = nn.BatchNorm2d(C_out, eps=1e-3, affine=affine)
+    self.pad = nn.ConstantPad2d((0, 1, 0, 1), 0)
+  def forward(self, x):
+    x = self.relu(x)
+    y = self.pad(x)
+    out = torch.cat([self.conv_1(x), self.conv_2(y[:,:,1:,1:])], dim=1)
+    out = self.bn(out)
+    return out

README.md CHANGED Viewed

@@ -1,12 +1 @@
----
-title: Tempsal
-emoji: ⚡
-colorFrom: gray
-colorTo: red
-sdk: gradio
-sdk_version: 4.44.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference


1	+ Download the model checkpoint from: https://drive.google.com/drive/folders/1W92oXYra_OPYkR1W56D80iDexWIR7f7Z?usp=sharing Follow the instructions on inference.ipynb. This notebook provides predictions on temporal and image saliency together.

checkpoints/Readme.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Please download the checkpoint from the following link :
2	+ https://drive.google.com/drive/folders/1W92oXYra_OPYkR1W56D80iDexWIR7f7Z?usp=sharing

dataloader_clean.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from torchvision import transforms
+import torchvision.transforms.functional as TF
+from PIL import Image
+from torch.utils.data import DataLoader
+import numpy as np
+import torch
+import os, cv2
+from utils import *
+import json
+import random
+from pycocotools.coco import COCO
+class SaliconDataset(DataLoader):
+    def __init__(self, img_dir, gt_dir, fix_dir, img_ids, exten='.png'):
+        self.img_dir = img_dir
+        self.gt_dir = gt_dir
+        self.fix_dir = fix_dir
+        self.img_ids = img_ids
+        self.exten = exten
+        self.img_transform = transforms.Compose([
+            transforms.Resize((256, 256)),
+            transforms.ToTensor(),
+            transforms.Normalize([0.5, 0.5, 0.5],
+                                [0.5, 0.5, 0.5])
+        ])
+    def __getitem__(self, idx):
+        img_id = self.img_ids[idx]
+        img_path = os.path.join(self.img_dir, img_id + '.jpg')
+        gt_path = os.path.join(self.gt_dir, img_id + self.exten)
+        fix_path = os.path.join(self.fix_dir, img_id + self.exten)
+        img = Image.open(img_path).convert('RGB')
+        img = self.img_transform(img)
+        gt = np.array(Image.open(gt_path).convert('L'))
+        gt = gt.astype('float')
+        gt = cv2.resize(gt, (256,256))
+        if np.max(gt) > 1.0:
+            gt = gt / 255.0
+        fixations = np.array(Image.open(fix_path).convert('L'))
+        fixations = fixations.astype('float')
+        fixations = (fixations > 0.5).astype('float')
+        assert np.min(gt)>=0.0 and np.max(gt)<=1.0
+        assert np.min(fixations)==0.0 and np.max(fixations)==1.0
+        return img, torch.FloatTensor(gt), torch.FloatTensor(fixations)
+    def __len__(self):
+         return len(self.img_ids)

generate_volumes.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from utils import *
+from operator import itemgetter
+from itertools import groupby
+import cv2
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('--time_slices', default=5, type=int)
+def generate_fixation_files(path, time_slices):
+    print('Parsing fixations of ' + path + '...')
+    filenames = [nm.split(".")[0] for nm in os.listdir(FIXATION_PATH + path)]
+    def create_dirs(dir_path):
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        dir_path = dir_path + '/' + path
+        if not os.path.exists(dir_path):
+            os.makedirs(dir_path)
+        return dir_path
+    sal_vol_path = create_dirs(SAL_VOL_PATH + str(time_slices))
+    fix_vol_path = create_dirs(FIX_VOL_PATH + str(time_slices))
+    conv2D = GaussianBlur2D().cuda()
+    print('Generating saliency volumes of ' + path + '...')
+    for filename in tqdm(filenames):
+        fixation_volume = parse_fixations([filename], FIXATION_PATH + path, progress_bar=False)[0]
+        fix_timestamps = sorted([fixation for fix_timestamps in fixation_volume
+                                        for fixation in fix_timestamps], key=lambda x: x[0])
+        fix_timestamps = np.array([(min(int(ts * time_slices / TIMESPAN), time_slices-1), (x, y)) for (ts, (x, y)) in fix_timestamps])
+        # Saving fixation map
+        fix_vol = np.zeros(shape=(time_slices,H,W))
+        for i, coords in fix_timestamps:
+            fix_vol[i, coords[1] - 1, coords[0] - 1] = 1
+        # Saving fixation list with timestamps
+        compressed = np.array([(key, list(v[1] for v in valuesiter))
+                            for key,valuesiter in groupby(fix_timestamps, key=itemgetter(0))])
+        saliency_volume = get_saliency_volume(compressed, conv2D, time_slices)
+        saliency_volume = saliency_volume.squeeze(0).squeeze(0).detach().cpu().numpy()
+        for i, saliency_slice in enumerate(saliency_volume):
+            cv2.imwrite(sal_vol_path + filename + '_' + str(i) + '.png', 255 * saliency_slice)
+            cv2.imwrite(fix_vol_path + filename + '_' + str(i) + '.png', 255 * fix_vol[i])
+args = parser.parse_args()
+time_slices = args.time_slices
+generate_fixation_files('train/', time_slices)
+generate_fixation_files('val/', time_slices)

inference.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

loss.py ADDED Viewed

	@@ -0,0 +1,256 @@

+import torch
+import numpy as np
+import cv2
+def kldiv(s_map, gt):
+    batch_size = s_map.size(0)
+    w = s_map.size(1)
+    h = s_map.size(2)
+    sum_s_map = torch.sum(s_map.view(batch_size, -1), 1)
+    expand_s_map = sum_s_map.view(batch_size, 1, 1).expand(batch_size, w, h)
+    assert expand_s_map.size() == s_map.size()
+    sum_gt = torch.sum(gt.view(batch_size, -1), 1)
+    expand_gt = sum_gt.view(batch_size, 1, 1).expand(batch_size, w, h)
+    assert expand_gt.size() == gt.size()
+    s_map = s_map/(expand_s_map*1.0)
+    gt = gt / (expand_gt*1.0)
+    s_map = s_map.view(batch_size, -1)
+    gt = gt.view(batch_size, -1)
+    eps = 2.2204e-16
+    result = gt * torch.log(eps + gt/(s_map + eps))
+    # print(torch.log(eps + gt/(s_map + eps))   )
+    return torch.mean(torch.sum(result, 1))
+def normalize_map(s_map):
+    # normalize the salience map (as done in MIT code)
+    batch_size = s_map.size(0)
+    w = s_map.size(1)
+    h = s_map.size(2)
+    min_s_map = torch.min(s_map.view(batch_size, -1), 1)[0].view(batch_size, 1, 1).expand(batch_size, w, h)
+    max_s_map = torch.max(s_map.view(batch_size, -1), 1)[0].view(batch_size, 1, 1).expand(batch_size, w, h)
+    norm_s_map = (s_map - min_s_map)/(max_s_map-min_s_map*1.0)
+    return norm_s_map
+def similarity(s_map, gt):
+    ''' For single image metric
+        Size of Image - WxH or 1xWxH
+        gt is ground truth saliency map
+    '''
+    batch_size = s_map.size(0)
+    w = s_map.size(1)
+    h = s_map.size(2)
+    s_map = normalize_map(s_map)
+    gt = normalize_map(gt)
+    sum_s_map = torch.sum(s_map.view(batch_size, -1), 1)
+    expand_s_map = sum_s_map.view(batch_size, 1, 1).expand(batch_size, w, h)
+    assert expand_s_map.size() == s_map.size()
+    sum_gt = torch.sum(gt.view(batch_size, -1), 1)
+    expand_gt = sum_gt.view(batch_size, 1, 1).expand(batch_size, w, h)
+    s_map = s_map/(expand_s_map*1.0)
+    gt = gt / (expand_gt*1.0)
+    s_map = s_map.view(batch_size, -1)
+    gt = gt.view(batch_size, -1)
+    return torch.mean(torch.sum(torch.min(s_map, gt), 1))
+def cc(s_map, gt):
+    batch_size = s_map.size(0)
+    w = s_map.size(1)
+    h = s_map.size(2)
+    mean_s_map = torch.mean(s_map.view(batch_size, -1), 1).view(batch_size, 1, 1).expand(batch_size, w, h)
+    std_s_map = torch.std(s_map.view(batch_size, -1), 1).view(batch_size, 1, 1).expand(batch_size, w, h)
+    mean_gt = torch.mean(gt.view(batch_size, -1), 1).view(batch_size, 1, 1).expand(batch_size, w, h)
+    std_gt = torch.std(gt.view(batch_size, -1), 1).view(batch_size, 1, 1).expand(batch_size, w, h)
+    s_map = (s_map - mean_s_map) / std_s_map
+    gt = (gt - mean_gt) / std_gt
+    ab = torch.sum((s_map * gt).view(batch_size, -1), 1)
+    aa = torch.sum((s_map * s_map).view(batch_size, -1), 1)
+    bb = torch.sum((gt * gt).view(batch_size, -1), 1)
+    return torch.mean(ab / (torch.sqrt(aa*bb)))
+def nss(s_map, gt):
+    if s_map.size() != gt.size():
+        s_map = s_map.cpu().detach().numpy()
+        s_map = torch.FloatTensor([cv2.resize(map, (gt.size(2), gt.size(1))) for map in s_map])
+        s_map = s_map.cuda()
+        gt = gt.cuda()
+    assert s_map.size()==gt.size()
+    batch_size = s_map.size(0)
+    w = s_map.size(1)
+    h = s_map.size(2)
+    mean_s_map = torch.mean(s_map.view(batch_size, -1), 1).view(batch_size, 1, 1).expand(batch_size, w, h)
+    std_s_map = torch.std(s_map.view(batch_size, -1), 1).view(batch_size, 1, 1).expand(batch_size, w, h)
+    eps = 2.2204e-16
+    s_map = (s_map - mean_s_map) / (std_s_map + eps)
+    s_map = torch.sum((s_map * gt).view(batch_size, -1), 1)
+    count = torch.sum(gt.view(batch_size, -1), 1)
+    return torch.mean(s_map / count)
+def auc_judd(saliencyMap, fixationMap, jitter=True, normalize=False):
+    # saliencyMap is the saliency map
+    # fixationMap is the human fixation map (binary matrix)
+    # jitter=True will add tiny non-zero random constant to all map locations to ensure
+    #       ROC can be calculated robustly (to avoid uniform region)
+    # If there are no fixations to predict, return NaN
+    if saliencyMap.size() != fixationMap.size():
+        saliencyMap = saliencyMap.cpu().squeeze(0).numpy()
+        saliencyMap = torch.FloatTensor(cv2.resize(saliencyMap, (fixationMap.size(2), fixationMap.size(1)))).unsqueeze(0)
+        # saliencyMap = saliencyMap.cuda()
+        # fixationMap = fixationMap.cuda()
+    if len(saliencyMap.size())==3:
+        saliencyMap = saliencyMap[0,:,:]
+        fixationMap = fixationMap[0,:,:]
+    saliencyMap = saliencyMap.numpy()
+    fixationMap = fixationMap.numpy()
+    if normalize:
+        saliencyMap = normalize_map(saliencyMap)
+    if not fixationMap.any():
+        print('Error: no fixationMap')
+        score = float('nan')
+        return score
+    # make the saliencyMap the size of the image of fixationMap
+    if not np.shape(saliencyMap) == np.shape(fixationMap):
+        from scipy.misc import imresize
+        saliencyMap = imresize(saliencyMap, np.shape(fixationMap))
+    # jitter saliency maps that come from saliency models that have a lot of zero values.
+    # If the saliency map is made with a Gaussian then it does not need to be jittered as
+    # the values are varied and there is not a large patch of the same value. In fact
+    # jittering breaks the ordering in the small values!
+    if jitter:
+        # jitter the saliency map slightly to distrupt ties of the same numbers
+        saliencyMap = saliencyMap + np.random.random(np.shape(saliencyMap)) / 10 ** 7
+    # normalize saliency map
+    saliencyMap = (saliencyMap - saliencyMap.min()) \
+                  / (saliencyMap.max() - saliencyMap.min())
+    if np.isnan(saliencyMap).all():
+        print('NaN saliencyMap')
+        score = float('nan')
+        return score
+    S = saliencyMap.flatten()
+    F = fixationMap.flatten()
+    Sth = S[F > 0]  # sal map values at fixation locations
+    Nfixations = len(Sth)
+    Npixels = len(S)
+    allthreshes = sorted(Sth, reverse=True)  # sort sal map values, to sweep through values
+    tp = np.zeros((Nfixations + 2))
+    fp = np.zeros((Nfixations + 2))
+    tp[0], tp[-1] = 0, 1
+    fp[0], fp[-1] = 0, 1
+    for i in range(Nfixations):
+        thresh = allthreshes[i]
+        aboveth = (S >= thresh).sum()  # total number of sal map values above threshold
+        tp[i + 1] = float(i + 1) / Nfixations  # ratio sal map values at fixation locations
+        # above threshold
+        fp[i + 1] = float(aboveth - i) / (Npixels - Nfixations)  # ratio other sal map values
+        # above threshold
+    score = np.trapz(tp, x=fp)
+    allthreshes = np.insert(allthreshes, 0, 0)
+    allthreshes = np.append(allthreshes, 1)
+    return score
+def auc_shuff(s_map,gt,other_map,splits=100,stepsize=0.1):
+    if len(s_map.size())==3:
+        s_map = s_map[0,:,:]
+        gt = gt[0,:,:]
+        other_map = other_map[0,:,:]
+    s_map = s_map.numpy()
+    s_map = normalize_map(s_map)
+    gt = gt.numpy()
+    other_map = other_map.numpy()
+    num_fixations = np.sum(gt)
+    x,y = np.where(other_map==1)
+    other_map_fixs = []
+    for j in zip(x,y):
+        other_map_fixs.append(j[0]*other_map.shape[0] + j[1])
+    ind = len(other_map_fixs)
+    assert ind==np.sum(other_map), 'something is wrong in auc shuffle'
+    num_fixations_other = min(ind,num_fixations)
+    num_pixels = s_map.shape[0]*s_map.shape[1]
+    random_numbers = []
+    for i in range(0,splits):
+        temp_list = []
+        t1 = np.random.permutation(ind)
+        for k in t1:
+            temp_list.append(other_map_fixs[k])
+        random_numbers.append(temp_list)
+    aucs = []
+    # for each split, calculate auc
+    for i in random_numbers:
+        r_sal_map = []
+        for k in i:
+            r_sal_map.append(s_map[k%s_map.shape[0]-1, int(k/s_map.shape[0])])
+        # in these values, we need to find thresholds and calculate auc
+        thresholds = [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
+        r_sal_map = np.array(r_sal_map)
+        # once threshs are got
+        thresholds = sorted(set(thresholds))
+        area = []
+        area.append((0.0,0.0))
+        for thresh in thresholds:
+            # in the salience map, keep only those pixels with values above threshold
+            temp = np.zeros(s_map.shape)
+            temp[s_map>=thresh] = 1.0
+            num_overlap = np.where(np.add(temp,gt)==2)[0].shape[0]
+            tp = num_overlap/(num_fixations*1.0)
+            #fp = (np.sum(temp) - num_overlap)/((np.shape(gt)[0] * np.shape(gt)[1]) - num_fixations)
+            # number of values in r_sal_map, above the threshold, divided by num of random locations = num of fixations
+            fp = len(np.where(r_sal_map>thresh)[0])/(num_fixations*1.0)
+            area.append((round(tp,4),round(fp,4)))
+        area.append((1.0,1.0))
+        area.sort(key = lambda x:x[0])
+        tp_list =  [x[0] for x in area]
+        fp_list =  [x[1] for x in area]
+        aucs.append(np.trapz(np.array(tp_list),np.array(fp_list)))
+    return np.mean(aucs)

model.py ADDED Viewed

	@@ -0,0 +1,322 @@

+import torchvision.models as models
+import torch
+import torch.nn as nn
+from collections import OrderedDict
+import sys
+from einops import rearrange, repeat
+from einops.layers.torch import Rearrange
+from  scipy import ndimage
+sys.path.append('./PNAS/')
+from PNASnet import *
+from genotypes import PNASNet
+import torch.nn.functional as nnf
+import numpy as np
+class PNASModel(nn.Module):
+    def __init__(self, num_channels=3, train_enc=False, load_weight=1):
+        super(PNASModel, self).__init__()
+        self.pnas = NetworkImageNet(216, 1001, 12, False, PNASNet)
+        if load_weight:
+            self.pnas.load_state_dict(torch.load(self.path))
+        for param in self.pnas.parameters():
+            param.requires_grad = train_enc
+        self.padding = nn.ConstantPad2d((0,1,0,1),0)
+        self.drop_path_prob = 0
+        self.linear_upsampling = nn.UpsamplingBilinear2d(scale_factor=2)
+        self.deconv_layer0 = nn.Sequential(
+            nn.Conv2d(in_channels = 4320, out_channels = 512, kernel_size=3, padding=1, bias = True),
+            nn.ReLU(inplace=True),
+            self.linear_upsampling
+        )
+        self.deconv_layer1 = nn.Sequential(
+            nn.Conv2d(in_channels = 512+2160, out_channels = 256, kernel_size = 3, padding = 1, bias = True),
+            nn.ReLU(inplace=True),
+            self.linear_upsampling
+        )
+        self.deconv_layer2 = nn.Sequential(
+            nn.Conv2d(in_channels = 1080+256, out_channels = 270, kernel_size = 3, padding = 1, bias = True),
+            nn.ReLU(inplace=True),
+            self.linear_upsampling
+        )
+        self.deconv_layer3 = nn.Sequential(
+            nn.Conv2d(in_channels = 540, out_channels = 96, kernel_size = 3, padding = 1, bias = True),
+            nn.ReLU(inplace=True),
+            self.linear_upsampling
+        )
+        self.deconv_layer4 = nn.Sequential(
+            nn.Conv2d(in_channels = 192, out_channels = 128, kernel_size = 3, padding = 1, bias = True),
+            nn.ReLU(inplace=True),
+            self.linear_upsampling
+        )
+        self.deconv_layer5 = nn.Sequential(
+            nn.Conv2d(in_channels = 128, out_channels = 128, kernel_size = 3, padding = 1, bias = True),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels = 128, out_channels = 1, kernel_size = 3, padding = 1, bias = True),
+            nn.Sigmoid()
+        )
+    def forward(self, images):
+        batch_size = images.size(0)
+        s0 = self.pnas.conv0(images)
+        s0 = self.pnas.conv0_bn(s0)
+        out1 = self.padding(s0)
+        s1 = self.pnas.stem1(s0, s0, self.drop_path_prob)
+        out2 = s1
+        s0, s1 = s1, self.pnas.stem2(s0, s1, 0)
+        for i, cell in enumerate(self.pnas.cells):
+            s0, s1 = s1, cell(s0, s1, 0)
+            if i==3:
+                out3 = s1
+            if i==7:
+                out4 = s1
+            if i==11:
+                out5 = s1
+        out5 = self.deconv_layer0(out5)
+        x = torch.cat((out5,out4), 1)
+        x = self.deconv_layer1(x)
+        x = torch.cat((x,out3), 1)
+        x = self.deconv_layer2(x)
+        x = torch.cat((x,out2), 1)
+        x = self.deconv_layer3(x)
+        x = torch.cat((x,out1), 1)
+        x = self.deconv_layer4(x)
+        x = self.deconv_layer5(x)
+        x = x.squeeze(1)
+     #   print("PNAS pred actual pnas:", x.mean(),x.min(), x.max(), x.sum())
+        return x
+class PNASVolModellast(nn.Module):
+    def __init__(self, time_slices, num_channels=3, train_enc=False, load_weight=1):
+            super(PNASVolModellast, self).__init__()
+            self.pnas = NetworkImageNet(216, 1001, 12, False, PNASNet)
+            if load_weight:
+                state_dict = torch.load(self.path)
+                new_state_dict = OrderedDict()
+                for k, v in state_dict.items():
+                    if 'module'  in k:
+                        k = 'module.pnas.' + k
+                    else:
+                        k = k.replace('pnas.', '')
+                    new_state_dict[k] = v
+                self.pnas.load_state_dict(new_state_dict, strict=False)
+            for param in self.pnas.parameters():
+                param.requires_grad = train_enc
+            self.padding = nn.ConstantPad2d((0,1,0,1),0)
+            self.drop_path_prob = 0
+            self.linear_upsampling = nn.UpsamplingBilinear2d(scale_factor=2)
+            self.deconv_layer0 = nn.Sequential(
+                nn.Conv2d(in_channels = 4320, out_channels = 512, kernel_size=3, padding=1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+            self.deconv_layer1 = nn.Sequential(
+                nn.Conv2d(in_channels = 512+2160, out_channels = 256, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+            self.deconv_layer2 = nn.Sequential(
+                nn.Conv2d(in_channels = 1080+256, out_channels = 270, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+            self.deconv_layer3 = nn.Sequential(
+                nn.Conv2d(in_channels = 540, out_channels = 96, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+            self.deconv_layer4 = nn.Sequential(
+                nn.Conv2d(in_channels = 192, out_channels = 128, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+            self.deconv_layer5 = nn.Sequential(
+                nn.Conv2d(in_channels = 128, out_channels = 64, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_channels = 64, out_channels = 32, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(in_channels = 32, out_channels = time_slices, kernel_size = 3, padding = 1, bias = True),
+                nn.Sigmoid()
+            )
+    def forward(self, images):
+        s0 = self.pnas.conv0(images)
+        s0 = self.pnas.conv0_bn(s0)
+        out1 = self.padding(s0)
+        s1 = self.pnas.stem1(s0, s0, self.drop_path_prob)
+        out2 = s1
+        s0, s1 = s1, self.pnas.stem2(s0, s1, 0)
+        for i, cell in enumerate(self.pnas.cells):
+            s0, s1 = s1, cell(s0, s1, 0)
+            if i==3:
+                out3 = s1
+            if i==7:
+                out4 = s1
+            if i==11:
+                out5 = s1
+        out5 = self.deconv_layer0(out5)
+        x = torch.cat((out5,out4), 1)
+        x = self.deconv_layer1(x)
+        x = torch.cat((x,out3), 1)
+        x = self.deconv_layer2(x)
+        x = torch.cat((x,out2), 1)
+        x = self.deconv_layer3(x)
+        x = torch.cat((x,out1), 1)
+        x = self.deconv_layer4(x)
+        x = self.deconv_layer5(x)
+        x = x / x.max()
+        return x , [out1,out2,out3,out4,out5]
+class PNASBoostedModelMultiLevel(nn.Module):
+    def __init__(self, device, model_path, model_vol_path, time_slices, train_model=False, selected_slices=""):
+        super(PNASBoostedModelMultiLevel, self).__init__()
+        self.selected_slices = selected_slices
+        self.linear_upsampling = nn.UpsamplingBilinear2d(scale_factor=2)
+        self.deconv_layer1 = nn.Sequential(
+                nn.Conv2d(in_channels = 512+2160+6, out_channels = 256, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+        self.deconv_layer2 = nn.Sequential(
+                nn.Conv2d(in_channels = 1080+256+6, out_channels = 270, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+        self.deconv_layer3 = nn.Sequential(
+                nn.Conv2d(in_channels = 540+6, out_channels = 96, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+        self.deconv_layer4 = nn.Sequential(
+                nn.Conv2d(in_channels = 192+6, out_channels = 128, kernel_size = 3, padding = 1, bias = True),
+                nn.ReLU(inplace=True),
+                self.linear_upsampling
+            )
+        self.deconv_mix = nn.Sequential(
+            nn.Conv2d(in_channels = 128+6 , out_channels = 16, kernel_size = 3, padding = 1, bias = True),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3, padding = 1, bias = True),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(in_channels = 32, out_channels = 1, kernel_size = 3, padding = 1, bias = True),
+            nn.Sigmoid()
+        )
+        model_vol = PNASVolModellast(time_slices=5, load_weight=0) #change this to time slices
+        model_vol = nn.DataParallel(model_vol).cuda()
+        state_dict = torch.load(model_path)
+        vol_state_dict = OrderedDict()
+        sal_state_dict = OrderedDict()
+        smm_state_dict = OrderedDict()
+        for k, v in state_dict.items():
+            if 'pnas_vol'  in k:
+                k = k.replace('pnas_vol.module.', '')
+                vol_state_dict[k] = v
+            elif 'pnas_sal'  in k:
+                k = k.replace('pnas_sal.module.', '')
+                sal_state_dict[k] = v
+            else:
+                smm_state_dict[k] = v
+        self.load_state_dict(smm_state_dict)
+        model_vol.load_state_dict(vol_state_dict)
+        self.pnas_vol = nn.DataParallel(model_vol).cuda()
+        for param in self.pnas_vol.parameters():
+            param.requires_grad = False
+        model = PNASModel(load_weight=0)
+        model = nn.DataParallel(model).cuda()
+        model.load_state_dict(sal_state_dict, strict=True)
+        self.pnas_sal = nn.DataParallel(model).to(device)
+        for param in self.pnas_sal.parameters():
+            param.requires_grad = False #train_model
+    def forward(self, images):
+      #  print("IMAGES", images.shape)
+        pnas_pred = self.pnas_sal(images).unsqueeze(1)
+        pnas_vol_pred , outs = self.pnas_vol(images)
+        out1 , out2, out3, out4, out5 = outs
+        #print(pnas_vol_pred.shape)
+        x_maps = torch.cat((pnas_pred, pnas_vol_pred), 1)
+        x = torch.cat((out5,out4), 1)
+        x_maps16 = nnf.interpolate(x_maps, size=(16, 16), mode='bicubic', align_corners=False)
+        x = torch.cat((x,x_maps16), 1)
+        x = self.deconv_layer1(x)
+        x = torch.cat((x,out3), 1)
+        x_maps32 = nnf.interpolate(x_maps, size=(32, 32), mode='bicubic', align_corners=False)
+        x = torch.cat((x,x_maps32), 1)
+        x = self.deconv_layer2(x)
+        x = torch.cat((x,out2), 1)
+        x_maps64 = nnf.interpolate(x_maps, size=(64, 64), mode='bicubic', align_corners=False)
+        x = torch.cat((x,x_maps64), 1)
+        x = self.deconv_layer3(x)
+        x = torch.cat((x,out1), 1)
+        x_maps128 = nnf.interpolate(x_maps, size=(128, 128), mode='bicubic', align_corners=False)
+        x = torch.cat((x,x_maps128), 1)
+        x = self.deconv_layer4(x)
+        x = torch.cat((x,x_maps), 1)
+        x = self.deconv_mix(x)
+        x = x.squeeze(1)
+        return x, pnas_vol_pred

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+wandb
+pycocotools
+torch==1.8.0+cu111
+torchvision==0.9.0+cu111
+torchaudio==0.8.0
+libgl1-mesa-glx
+ftfy
+regex
+tqdm
+ipywidgets
+seaborn
+einops
+clip-anytorch
+pycocotools
+kornia==0.5.10

testing/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

testing/gt/COCO_val2014_000000000192.png ADDED Viewed

testing/gt/COCO_val2014_000000000192_0.png ADDED Viewed

testing/gt/COCO_val2014_000000000192_1.png ADDED Viewed

testing/gt/COCO_val2014_000000000192_2.png ADDED Viewed

testing/gt/COCO_val2014_000000000192_3.png ADDED Viewed

testing/gt/COCO_val2014_000000000192_4.png ADDED Viewed

testing/gt/COCO_val2014_000000000208.png ADDED Viewed

testing/gt/COCO_val2014_000000000208_0.png ADDED Viewed

testing/gt/COCO_val2014_000000000208_1.png ADDED Viewed

testing/gt/COCO_val2014_000000000208_2.png ADDED Viewed

testing/gt/COCO_val2014_000000000208_3.png ADDED Viewed

testing/gt/COCO_val2014_000000000208_4.png ADDED Viewed

testing/images/COCO_val2014_000000000192.jpg ADDED Viewed

testing/images/COCO_val2014_000000000208.jpg ADDED Viewed

testing/predictions/Readme.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ Your predictions will appear in this folder after running the notebook.

train.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import argparse
+import os
+import torch
+import sys
+import time
+import wandb
+import torch.nn as nn
+from tqdm import tqdm
+from dataloader import SaliconDataset
+from loss import *
+from utils import AverageMeter
+from utils import img_save
+from torchvision import utils
+import torch.nn.functional as nnf
+from os.path import join
+from PIL import Image
+parser = argparse.ArgumentParser()
+parser.add_argument('--no_epochs',default=30, type=int)
+parser.add_argument('--lr',default=1e-5, type=float)
+parser.add_argument('--kldiv',default=True, type=bool)
+parser.add_argument('--cc',default=True, type=bool)
+parser.add_argument('--nss',default=False, type=bool)
+parser.add_argument('--sim',default=False, type=bool)
+parser.add_argument('--nss_emlnet',default=False, type=bool)
+parser.add_argument('--nss_norm',default=False, type=bool)
+parser.add_argument('--l1',default=False, type=bool)
+parser.add_argument('--lr_sched',default=False, type=bool)
+parser.add_argument('--dilation',default=False, type=bool)
+parser.add_argument('--enc_model',default="pnas", type=str)
+parser.add_argument('--optim',default="Adam", type=str)
+parser.add_argument('--load_weight',default=1, type=int)
+parser.add_argument('--kldiv_coeff',default=1.0, type=float)
+parser.add_argument('--step_size',default=5, type=int)
+parser.add_argument('--cc_coeff',default=-1.0, type=float)
+parser.add_argument('--sim_coeff',default=-1.0, type=float)
+parser.add_argument('--nss_coeff',default=-1.0, type=float)
+parser.add_argument('--nss_emlnet_coeff',default=1.0, type=float)
+parser.add_argument('--nss_norm_coeff',default=1.0, type=float)
+parser.add_argument('--l1_coeff',default=1.0, type=float)
+parser.add_argument('--train_enc',default=1, type=int)
+parser.add_argument('--dataset_dir',default="../data/", type=str)
+parser.add_argument('--batch_size',default=32, type=int)
+parser.add_argument('--log_interval',default=60, type=int)
+parser.add_argument('--no_workers',default=4, type=int)
+parser.add_argument('--train_model',default=False, type=bool)
+parser.add_argument('--time_slices',default=5, type=int)
+parser.add_argument('--selected_slices',default="", type=str)
+parser.add_argument('--results_dir',default="", type=str )
+# Path to save the model weights
+parser.add_argument('--model_val_path',default="model.pt", type=str)
+# If the model type is pnas_boosted, specify the path of the pre-trained pnas model here
+parser.add_argument('--model_path',default="", type=str)
+# If the model type is pnas_boosted, specify the path of the pre-trained pnasvol model here
+parser.add_argument('--model_vol_path',default="", type=str)
+args = parser.parse_args()
+train_img_dir = args.dataset_dir + "images/train/"
+train_gt_dir = args.dataset_dir + "maps/train/"
+train_fix_dir = args.dataset_dir + "fixation_maps/train/"
+val_img_dir = args.dataset_dir + "images/val/"
+val_gt_dir = args.dataset_dir + "maps/val/"
+val_fix_dir = args.dataset_dir + "fixation_maps/val/"
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+if args.enc_model == "pnas":
+    print("PNAS Model")
+    from model import PNASModel
+    model = PNASModel(train_enc=bool(args.train_enc), load_weight=args.load_weight)
+elif args.enc_model == "pnas_boosted_multi":
+    print("PNAS Boosted Model PNASBoostedModelMultilevel")
+    from model import PNASBoostedModelMultilevel
+    model = PNASBoostedModelMultilevel(device, args.model_path, args.model_vol_path, args.time_slices, train_model=args.train_model,selected_slices = args.selected_slices )
+if torch.cuda.device_count() > 1:
+	print("Let's use", torch.cuda.device_count(), "GPUs!")
+	model = nn.DataParallel(model)
+model.to(device)
+train_img_ids = [nm.split(".")[0] for nm in os.listdir(train_img_dir)]
+val_img_ids = [nm.split(".")[0] for nm in os.listdir(val_img_dir)]
+train_dataset = SaliconDataset(train_img_dir, train_gt_dir, train_fix_dir, train_img_ids)
+val_dataset = SaliconDataset(val_img_dir, val_gt_dir, val_fix_dir, val_img_ids)
+train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.no_workers)
+val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.no_workers)
+def loss_func(pred_map, gt, fixations, args):
+    loss = torch.FloatTensor([0.0]).cuda()
+    criterion = nn.L1Loss()
+    if args.kldiv:
+        loss += args.kldiv_coeff * kldiv(pred_map, gt)
+    if args.cc:
+        loss += args.cc_coeff * cc(pred_map, gt)
+    if args.nss:
+        loss += args.nss_coeff * nss(pred_map, fixations)
+    if args.l1:
+        loss += args.l1_coeff * criterion(pred_map, gt)
+    if args.sim:
+        loss += args.sim_coeff * similarity(pred_map, gt)
+    #print("Loss: ", loss)
+    return loss
+def train(model, optimizer, loader, epoch, device, args):
+    model.train()
+    tic = time.time()
+    total_loss = 0.0
+    cur_loss = 0.0
+    for idx, (img, gt, fixations) in enumerate(loader):
+        img = img.to(device)
+        gt = gt.to(device)
+        fixations = fixations.to(device)
+        optimizer.zero_grad()
+        pred_map, vol_pred = model(img)
+        assert pred_map.size() == gt.size()
+        loss = loss_func(pred_map, gt, fixations, args)
+        loss.backward()
+        total_loss += loss.item()
+        cur_loss += loss.item()
+        optimizer.step()
+        if idx%args.log_interval==(args.log_interval-1):
+            print('[{:2d}, {:5d}] avg_loss : {:.5f}, time:{:3f} minutes'.format(epoch, idx, cur_loss/args.log_interval, (time.time()-tic)/60))
+            wandb.log({"loss": cur_loss/args.log_interval})
+            cur_loss = 0.0
+            sys.stdout.flush()
+    print('[{:2d}, train] avg_loss : {:.5f}'.format(epoch, total_loss/len(loader)))
+    sys.stdout.flush()
+    return total_loss/len(loader)
+def validate(model, loader, epoch, device, args):
+    model.eval()
+    tic = time.time()
+    cc_loss = AverageMeter()
+    kldiv_loss = AverageMeter()
+    nss_loss = AverageMeter()
+    sim_loss = AverageMeter()
+    for (img, gt, fixations) in tqdm(loader):
+        img = img.to(device)
+        gt = gt.to(device)
+        fixations = fixations.to(device)
+        pred_map   , vol_pred = model(img)
+        cc_loss.update(cc(pred_map, gt))
+        kldiv_loss.update(kldiv(pred_map, gt))
+        nss_loss.update(nss(pred_map, fixations))
+        sim_loss.update(similarity(pred_map, gt))
+    print('[{:2d},   val] CC : {:.5f}, KLDIV : {:.5f}, NSS : {:.5f}, SIM : {:.5f}  time:{:3f} minutes'.format(epoch, cc_loss.avg, kldiv_loss.avg, nss_loss.avg, sim_loss.avg, (time.time()-tic)/60))
+    wandb.log({"CC": cc_loss.avg, 'KLDIV': kldiv_loss.avg, 'NSS': nss_loss.avg, 'SIM': sim_loss.avg})
+    sys.stdout.flush()
+    return cc_loss.avg,cc_loss,kldiv_loss,nss_loss,sim_loss
+params = list(filter(lambda p: p.requires_grad, model.parameters()))
+if args.optim=="Adam":
+    optimizer = torch.optim.Adam(params, lr=args.lr)
+if args.optim=="Adagrad":
+    optimizer = torch.optim.Adagrad(params, lr=args.lr)
+if args.optim=="SGD":
+    optimizer = torch.optim.SGD(params, lr=args.lr, momentum=0.9)
+if args.lr_sched:
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=0.1)
+print(device)
+best_loss = 0
+for epoch in range(0, args.no_epochs):
+    loss = train(model, optimizer, train_loader, epoch, device, args)
+    with torch.no_grad():
+                cc_loss,cc_loss_obj,kldiv_loss,nss_loss,sim_loss = validate(model, val_loader, epoch, device, args)
+                cc_loss -=kldiv_loss.avg
+                if epoch == 0 :
+                    best_loss = cc_loss
+                if best_loss <= cc_loss:
+                    best_loss = cc_loss
+                    print('[{:2d},  save, {}]'.format(epoch, args.model_val_path))
+                    wandb.log({"Best/CC mean": cc_loss,"Best/CC median": cc_loss_obj.get_median(), "Best/CC std": cc_loss_obj.get_std(),
+        "Best/KLD mean": kldiv_loss.avg,"Best/KLD median": kldiv_loss.get_median(), "Best/KLD std": kldiv_loss.get_std(),
+        "Best/NSS mean": nss_loss.avg,"Best/NSS median": nss_loss.get_median(), "Best/NSS std": nss_loss.get_std(),
+        "Best/SIM mean": sim_loss.avg,"Best/SIM median": sim_loss.get_median(), "Best/SIM std": sim_loss.get_std()})
+                    if torch.cuda.device_count() > 1:
+                        torch.save(model.module.state_dict(), args.model_val_path)
+                    else:
+                        torch.save(model.state_dict(), args.model_val_path)
+                print()
+    if args.lr_sched:
+        scheduler.step()

utils.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import fnmatch
+import os
+import torch
+import cv2
+from tqdm import tqdm
+from scipy.spatial import distance
+from math import pi, sqrt, exp
+from os.path import join
+from torchvision import utils
+from PIL import Image
+import numpy as np
+import scipy.io as sio
+import torch.nn as nn
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+import matplotlib.animation as animation
+W = 640
+H = 480
+TIMESPAN = 5000
+MAX_PIXEL_DISTANCE = 800
+ESTIMATED_TIMESTAMP_WEIGHT = 0.006
+RATIO = 0.9
+FIXATION_PATH = '../data/fixations/'
+FIX_VOL_PATH = '../data/fixation_volumes_'
+SAL_VOL_PATH = '../data/saliency_volumes_'
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKCYAN = '\033[96m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+def get_colored_value(value, ref_value, increasing=True):
+    coef = 1 if increasing else -1
+    return (bcolors.FAIL if (coef * ref_value > coef * value) else bcolors.OKGREEN) + '{:.5f}'.format(value) + bcolors.ENDC
+def get_filenames(path):
+    return [file for file in sorted(os.listdir(path)) if fnmatch.fnmatch(file, 'COCO_*')]
+def parse_fixations(filenames,
+                    path_prefix,
+                    etw=ESTIMATED_TIMESTAMP_WEIGHT, progress_bar=True):
+    fixation_volumes = []
+    filenames = tqdm(filenames) if progress_bar else filenames
+    for filename in filenames:
+        # 1. Extracting data from .mat files
+        mat = sio.loadmat(path_prefix + filename + '.mat')
+        gaze = mat["gaze"]
+        locations = []
+        timestamps = []
+        fixations = []
+        for i in range(len(gaze)):
+            locations.append(mat["gaze"][i][0][0])
+            timestamps.append(mat["gaze"][i][0][1])
+            fixations.append(mat["gaze"][i][0][2])
+        # 2. Matching fixations with timestamps
+        fixation_volume = []
+        for i, observer in enumerate(fixations):
+            fix_timestamps = []
+            fix_time = TIMESPAN / (len(observer) + 1)
+            est_timestamp = fix_time
+            for fixation in observer:
+                distances = distance.cdist([fixation], locations[i], 'euclidean')[0][..., np.newaxis]
+                time_diffs = abs(timestamps[i] - est_timestamp)
+                min_idx = (etw * time_diffs + distances).argmin()
+                fix_timestamps.append([min(timestamps[i][min_idx][0], TIMESPAN), fixation.tolist()])
+                est_timestamp += fix_time
+            if (len(observer) > 0):
+                fixation_volume.append(fix_timestamps)
+        fixation_volumes.append(fixation_volume)
+    return fixation_volumes
+def get_saliency_volume(fixation_volume, conv2D, time_slices):
+    fixation_map = torch.cuda.FloatTensor(time_slices,H,W).fill_(0)
+    for ts, coords in fixation_volume:
+        for (x, y) in coords:
+            fixation_map[ts,y-1,x-1] = 1
+    saliency_volume = conv2D.forward(fixation_map)
+    return saliency_volume / saliency_volume.max()
+def blur(img):
+    k_size = 11
+    bl = cv2.GaussianBlur(img,(k_size,k_size),0)
+    return torch.FloatTensor(bl)
+def visualize_model(model, loader, device, args):
+    with torch.no_grad():
+        model.eval()
+        os.makedirs(args.results_dir, exist_ok=True)
+        for (img, img_id, sz) in tqdm(loader):
+            img = img.to(device)
+            pred_map = model(img)
+            if type(pred_map) is tuple:
+                pred_map = pred_map[1]
+            pred_map = pred_map.cpu().squeeze(0).numpy()
+            pred_map = cv2.resize(pred_map, (sz[0], sz[1]))
+            pred_map = torch.FloatTensor(blur(pred_map))
+            img_save(pred_map, join(args.results_dir, img_id[0]), normalize=True)
+def img_save(tensor, fp, nrow=8, padding=2,
+               normalize=False, range=None, scale_each=False, pad_value=0, format=None):
+    grid = utils.make_grid(tensor, nrow=nrow, padding=padding, pad_value=pad_value,
+                     normalize=normalize, range=range, scale_each=scale_each)
+    ''' Add 0.5 after unnormalizing to [0, 255] to round to nearest integer '''
+    ndarr = torch.round(grid.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0)).to('cpu', torch.uint8).numpy()
+    im = Image.fromarray(ndarr[:,:,0])
+    #fp = fp[:-4] + '.png'
+    fp = '.png'
+    print(fp)
+    im.save("1.png", format=format, compress_level=0)
+class AverageMeter(object):
+    '''Computers and stores the average and current value'''
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        self.past = np.array([])
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+    def update(self, val, n = 1):
+        self.val = val
+        self.sum += val*n
+        self.count += n
+        self.avg = self.sum / self.count
+        self.past = np.append(self.past,val.cpu())
+    def get_std (self):
+        return np.std(self.past)
+    def get_median (self):
+        return np.median(self.past)
+def im2heat(pred_dir, a, gt, exten='.png'):
+    pred_nm = pred_dir + a + exten
+    pred = cv2.imread(pred_nm, 0)
+    heatmap_img = cv2.applyColorMap(pred, cv2.COLORMAP_JET)
+    heatmap_img = convert(heatmap_img)
+    pred = np.stack((pred, pred, pred),2).astype('float32')
+    pred = pred / 255.0
+    return np.uint8(pred * heatmap_img + (1.0-pred) * gt)
+def get_heat_image(image):
+    return cv2.cvtColor(cv2.applyColorMap(np.uint8(255 * image), cv2.COLORMAP_HOT), cv2.COLOR_BGR2RGB)
+def format_image(heatmap, image, max_value):
+    extended_map = heatmap / max_value
+    hsv = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
+    factors = np.clip(2 * extended_map, 0, 1)
+    hsv[:,:,1] = np.uint8(factors * hsv[:,:,1])
+    hsv[:,:,2] = np.uint8((RATIO * factors + (1 - RATIO)) * hsv[:,:,2])
+    return get_heat_image(extended_map[:,:,np.newaxis]), cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
+def animate(gt_vol, pred_vol, image):
+    fig = plt.figure(figsize=(16, 16))
+    gt_max = np.max(gt_vol);
+    pred_max = np.max(pred_vol);
+    formatted_images = []
+    for (gt_map, pred_map) in zip(gt_vol, pred_vol):
+        gt_heatmap, gt_image_heatmap = format_image(gt_map, image, gt_max)
+        gt_im = np.concatenate((gt_heatmap, gt_image_heatmap), 1)
+        pred_heatmap, pred_image_heatmap = format_image(pred_map, image, pred_max)
+        pred_im = np.concatenate((pred_heatmap, pred_image_heatmap), 1)
+        diff = 0.5 + ((gt_map / gt_max) - (pred_map / pred_max)) / 2
+        diff_im = cv2.cvtColor(cv2.applyColorMap(np.uint8(255 * diff[:,:,np.newaxis]), cv2.COLORMAP_TWILIGHT), cv2.COLOR_BGR2RGB)
+        diff_im = np.concatenate((diff_im, image), 1)
+        formatted_images.append([plt.imshow(np.concatenate((gt_im, pred_im, diff_im), 0), animated=True)])
+    return animation.ArtistAnimation(fig, formatted_images, interval=500, blit=True, repeat_delay=1000)
+def animate_single_heatmap(gt_vol, image):
+    fig = plt.figure(figsize=(6.4, 4.8),frameon=False)
+    ax = plt.Axes(fig, [0., 0., 1., 1.])
+    ax.set_axis_off()
+    fig.add_axes(ax)
+    gt_max = np.max(gt_vol);
+    formatted_images = []
+    plt.axis('off')
+    for gt_map in gt_vol:
+        gt_heatmap, gt_image_heatmap = format_image(gt_map, image, gt_max)
+        gt_im = gt_heatmap
+        formatted_images.append([ax.imshow(gt_im, animated=True)])
+    return animation.ArtistAnimation(fig, formatted_images, interval=1000, blit=True, repeat_delay=1000)
+def gauss(n, sigma):
+    r = range(-int(n/2),int(n/2)+1)
+    return [1 / (sigma * sqrt(2*pi)) * exp(-float(x)**2/(2*sigma**2)) for x in r]
+class GaussianBlur1D(nn.Module):
+    def __init__(self, time_slices):
+        super(GaussianBlur1D, self).__init__()
+        sigma = 2 * time_slices / 25
+        self.size = 2 * int(4 * sigma + 0.5) + 1
+        kernel = gauss(self.size, sigma)
+        kernel = torch.cuda.FloatTensor(kernel)
+        self.weight = nn.Parameter(data=kernel, requires_grad=False)
+    def forward(self, x):
+        pad = int(self.size/2)
+        temp = F.conv1d(x, self.weight.view(1, 1, -1, 1, 1), padding=pad)
+        return temp[:,:,:,pad:-pad,pad:-pad]
+class GaussianBlur2D(nn.Module):
+    def __init__(self):
+        super(GaussianBlur2D, self).__init__()
+        self.size = 201
+        kernel = gauss(self.size, 25)
+        kernel = torch.cuda.FloatTensor(kernel)
+        self.weight = nn.Parameter(data=kernel, requires_grad=False)
+    def forward(self, x):
+        pad = int(self.size/2)
+        temp = F.conv1d(x.unsqueeze(0).unsqueeze(0), self.weight.view(1, 1, 1, -1, 1), padding=pad)
+        temp = temp[:,:,pad:-pad,:,pad:-pad]
+        temp = F.conv1d(temp, self.weight.view(1, 1, 1, 1, -1), padding=pad)
+        return temp[:,:,pad:-pad,pad:-pad]