Spaces:
Running
Running
| from collections import OrderedDict | |
| import torch | |
| import torch.nn as nn | |
| from torch.utils import model_zoo | |
| from .features.alexnet import RGBalexnet | |
| from .modules import FeatureExtractor, Finalizer, DeepGazeII as TorchDeepGazeII | |
| class DeepGazeI(TorchDeepGazeII): | |
| """DeepGaze I model | |
| Please note that this version of DeepGaze I is not exactly the one from the original paper. | |
| The original model used caffe for AlexNet and theano for the linear readout and was trained using the SFO optimizer. | |
| Here, we use the torch implementation of AlexNet (without any adaptations), which doesn't use the two-steam architecture, | |
| and the DeepGaze II torch implementation with a simple linear readout network. | |
| The model has been retrained with Adam, but still on the same dataset (all images of MIT1003 which are of size 1024x768). | |
| Also, we don't use the sparsity penalty anymore. | |
| Reference: | |
| Kümmerer, M., Theis, L., & Bethge, M. (2015). Deep Gaze I: Boosting Saliency Prediction with Feature Maps Trained on ImageNet. ICLR Workshop Track. http://arxiv.org/abs/1411.1045 | |
| """ | |
| def __init__(self, pretrained=True): | |
| features = RGBalexnet() | |
| feature_extractor = FeatureExtractor(features, ['1.features.10']) | |
| readout_network = nn.Sequential(OrderedDict([ | |
| ('conv0', nn.Conv2d(256, 1, (1, 1), bias=False)), | |
| ])) | |
| super().__init__( | |
| features=feature_extractor, | |
| readout_network=readout_network, | |
| downsample=2, | |
| readout_factor=4, | |
| saliency_map_factor=4, | |
| ) | |
| if pretrained: | |
| self.load_state_dict(model_zoo.load_url('https://github.com/matthias-k/DeepGaze/releases/download/v1.01/deepgaze1.pth', map_location=torch.device('cpu'))) | |