|
|
import torch.nn as nn |
|
|
from torch.nn import functional as F |
|
|
from ndlinear import NdLinear |
|
|
from .utils_resnet import resnet18, resnet34, resnet50, resnet101, resnet152, resnet34nd |
|
|
from .ndlinear_util import ReshapedNdLinear |
|
|
import torch |
|
|
from safetensors.torch import save_file as safe_save, load_file as safe_load |
|
|
import json |
|
|
import os |
|
|
|
|
|
class PretrainedTripletModel(nn.Module): |
|
|
def save_pretrained(self, save_directory, safe_serialization=True): |
|
|
os.makedirs(save_directory, exist_ok=True) |
|
|
|
|
|
config = { |
|
|
"model_class": self.__class__.__name__, |
|
|
"embedding_dimension": self.embedding_dimension, |
|
|
"pretrained": self.pretrained, |
|
|
} |
|
|
with open(os.path.join(save_directory, "config.json"), "w") as f: |
|
|
json.dump(config, f, indent=2) |
|
|
|
|
|
|
|
|
state_dict = self.state_dict() |
|
|
if safe_serialization: |
|
|
safe_save(state_dict, os.path.join(save_directory, "model.safetensors")) |
|
|
else: |
|
|
torch.save(state_dict, os.path.join(save_directory, "pytorch_model.bin")) |
|
|
|
|
|
@classmethod |
|
|
def from_pretrained(cls, load_directory, safe_serialization=True): |
|
|
|
|
|
with open(os.path.join(load_directory, "config.json"), "r") as f: |
|
|
config = json.load(f) |
|
|
model = cls(**{k:config[k] for k in config if k in cls.__init__.__code__.co_varnames}) |
|
|
|
|
|
if safe_serialization: |
|
|
state_dict = safe_load(os.path.join(load_directory, "model.safetensors")) |
|
|
else: |
|
|
state_dict = torch.load(os.path.join(load_directory, "pytorch_model.bin"), map_location="cpu") |
|
|
model.load_state_dict(state_dict) |
|
|
return model |
|
|
|
|
|
class Resnet18Triplet(nn.Module): |
|
|
"""Constructs a ResNet-18 model for FaceNet training using triplet loss. |
|
|
|
|
|
Args: |
|
|
embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model. |
|
|
using triplet loss. Defaults to 512. |
|
|
pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository. |
|
|
Defaults to False. |
|
|
""" |
|
|
|
|
|
def __init__(self, embedding_dimension=512, pretrained=False): |
|
|
super(Resnet18Triplet, self).__init__() |
|
|
self.model = resnet18(pretrained=pretrained) |
|
|
|
|
|
|
|
|
input_features_fc_layer = self.model.fc.in_features |
|
|
self.model.fc = nn.Linear(input_features_fc_layer, embedding_dimension, bias=False) |
|
|
|
|
|
def forward(self, images): |
|
|
"""Forward pass to output the embedding vector (feature vector) after l2-normalization.""" |
|
|
embedding = self.model(images) |
|
|
|
|
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
|
|
|
|
return embedding |
|
|
|
|
|
|
|
|
class Resnet34Triplet(nn.Module): |
|
|
"""Constructs a ResNet-34 model for FaceNet training using triplet loss. |
|
|
|
|
|
Args: |
|
|
embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model. |
|
|
using triplet loss. Defaults to 512. |
|
|
pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository. |
|
|
Defaults to False. |
|
|
""" |
|
|
|
|
|
def __init__(self, embedding_dimension=512, pretrained=False): |
|
|
super(Resnet34Triplet, self).__init__() |
|
|
self.model = resnet34(pretrained=pretrained) |
|
|
|
|
|
|
|
|
input_features_fc_layer = self.model.fc.in_features |
|
|
self.model.fc = nn.Linear(input_features_fc_layer, embedding_dimension, bias=False) |
|
|
|
|
|
def forward(self, images): |
|
|
"""Forward pass to output the embedding vector (feature vector) after l2-normalization.""" |
|
|
embedding = self.model(images) |
|
|
|
|
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
|
|
|
|
return embedding |
|
|
|
|
|
|
|
|
class Resnet50Triplet(nn.Module): |
|
|
"""Constructs a ResNet-50 model for FaceNet training using triplet loss. |
|
|
|
|
|
Args: |
|
|
embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model. |
|
|
using triplet loss. Defaults to 512. |
|
|
pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository. |
|
|
Defaults to False. |
|
|
""" |
|
|
|
|
|
def __init__(self, embedding_dimension=512, pretrained=False): |
|
|
super(Resnet50Triplet, self).__init__() |
|
|
self.model = resnet50(pretrained=pretrained) |
|
|
|
|
|
|
|
|
input_features_fc_layer = self.model.fc.in_features |
|
|
self.model.fc = nn.Linear(input_features_fc_layer, embedding_dimension, bias=False) |
|
|
|
|
|
def forward(self, images): |
|
|
"""Forward pass to output the embedding vector (feature vector) after l2-normalization.""" |
|
|
embedding = self.model(images) |
|
|
|
|
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
|
|
|
|
return embedding |
|
|
|
|
|
|
|
|
class Resnet101Triplet(nn.Module): |
|
|
"""Constructs a ResNet-101 model for FaceNet training using triplet loss. |
|
|
|
|
|
Args: |
|
|
embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model. |
|
|
using triplet loss. Defaults to 512. |
|
|
pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository. |
|
|
Defaults to False. |
|
|
""" |
|
|
|
|
|
def __init__(self, embedding_dimension=512, pretrained=False): |
|
|
super(Resnet101Triplet, self).__init__() |
|
|
self.model = resnet101(pretrained=pretrained) |
|
|
|
|
|
|
|
|
input_features_fc_layer = self.model.fc.in_features |
|
|
self.model.fc = nn.Linear(input_features_fc_layer, embedding_dimension, bias=False) |
|
|
|
|
|
def forward(self, images): |
|
|
"""Forward pass to output the embedding vector (feature vector) after l2-normalization.""" |
|
|
embedding = self.model(images) |
|
|
|
|
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
|
|
|
|
return embedding |
|
|
|
|
|
|
|
|
class Resnet152Triplet(nn.Module): |
|
|
"""Constructs a ResNet-152 model for FaceNet training using triplet loss. |
|
|
|
|
|
Args: |
|
|
embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model. |
|
|
using triplet loss. Defaults to 512. |
|
|
pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository. |
|
|
Defaults to False. |
|
|
""" |
|
|
|
|
|
def __init__(self, embedding_dimension=512, pretrained=False): |
|
|
super(Resnet152Triplet, self).__init__() |
|
|
self.model = resnet152(pretrained=pretrained) |
|
|
|
|
|
|
|
|
input_features_fc_layer = self.model.fc.in_features |
|
|
self.model.fc = nn.Linear(input_features_fc_layer, embedding_dimension, bias=False) |
|
|
|
|
|
def forward(self, images): |
|
|
"""Forward pass to output the embedding vector (feature vector) after l2-normalization.""" |
|
|
embedding = self.model(images) |
|
|
|
|
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
|
|
|
|
return embedding |
|
|
|
|
|
|
|
|
class Resnet34NdTriplet(nn.Module): |
|
|
"""Constructs a ResNet-34 model for FaceNet training using triplet loss. |
|
|
|
|
|
Args: |
|
|
embedding_dimension (int): Required dimension of the resulting embedding layer that is outputted by the model. |
|
|
using triplet loss. Defaults to 512. |
|
|
pretrained (bool): If True, returns a model pre-trained on the ImageNet dataset from a PyTorch repository. |
|
|
Defaults to False. |
|
|
""" |
|
|
|
|
|
def __init__(self, embedding_dimension=512, pretrained=False): |
|
|
super(Resnet34NdTriplet, self).__init__() |
|
|
|
|
|
self.model = resnet34(pretrained=pretrained) |
|
|
|
|
|
|
|
|
input_features_fc_layer = self.model.fc.in_features |
|
|
self.model.fc = ReshapedNdLinear( |
|
|
NdLinear((input_features_fc_layer, 1), |
|
|
(embedding_dimension, 1), |
|
|
bias=False) |
|
|
) |
|
|
|
|
|
def forward(self, images): |
|
|
"""Forward pass to output the embedding vector (feature vector) after l2-normalization.""" |
|
|
embedding = self.model(images) |
|
|
|
|
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
|
|
|
|
return embedding |
|
|
|
|
|
|
|
|
class Resnet50NdTriplet(PretrainedTripletModel): |
|
|
def __init__(self, embedding_dimension=512, pretrained=False): |
|
|
super().__init__() |
|
|
self.embedding_dimension = embedding_dimension |
|
|
self.pretrained = pretrained |
|
|
self.model = resnet50(pretrained=pretrained) |
|
|
input_features_fc_layer = self.model.fc.in_features |
|
|
self.model.fc = ReshapedNdLinear( |
|
|
NdLinear((input_features_fc_layer, 1), (embedding_dimension // 16, 16), bias=False) |
|
|
) |
|
|
def forward(self, images): |
|
|
"""Forward pass to output the embedding vector (feature vector) after l2-normalization.""" |
|
|
embedding = self.model(images) |
|
|
|
|
|
embedding = F.normalize(embedding, p=2, dim=1) |
|
|
|
|
|
return embedding |