| | """ Model creation / weight loading / state_dict helpers |
| | |
| | Hacked together by / Copyright 2020 Ross Wightman |
| | """ |
| | import logging |
| | import os |
| | import math |
| | from collections import OrderedDict |
| | from copy import deepcopy |
| | from typing import Callable |
| |
|
| | import torch |
| | import torch.nn as nn |
| | import torch.utils.model_zoo as model_zoo |
| |
|
| | _logger = logging.getLogger(__name__) |
| |
|
| |
|
| | def load_state_dict(checkpoint_path, use_ema=False): |
| | if checkpoint_path and os.path.isfile(checkpoint_path): |
| | checkpoint = torch.load(checkpoint_path, map_location='cpu') |
| | state_dict_key = 'state_dict' |
| | if isinstance(checkpoint, dict): |
| | if use_ema and 'state_dict_ema' in checkpoint: |
| | state_dict_key = 'state_dict_ema' |
| | if state_dict_key and state_dict_key in checkpoint: |
| | new_state_dict = OrderedDict() |
| | for k, v in checkpoint[state_dict_key].items(): |
| | |
| | name = k[7:] if k.startswith('module') else k |
| | new_state_dict[name] = v |
| | state_dict = new_state_dict |
| | else: |
| | state_dict = checkpoint |
| | _logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path)) |
| | return state_dict |
| | else: |
| | _logger.error("No checkpoint found at '{}'".format(checkpoint_path)) |
| | raise FileNotFoundError() |
| |
|
| |
|
| | def load_checkpoint(model, checkpoint_path, use_ema=False, strict=True): |
| | state_dict = load_state_dict(checkpoint_path, use_ema) |
| | model.load_state_dict(state_dict, strict=strict) |
| |
|
| |
|
| | def resume_checkpoint(model, checkpoint_path, optimizer=None, loss_scaler=None, log_info=True): |
| | resume_epoch = None |
| | if os.path.isfile(checkpoint_path): |
| | checkpoint = torch.load(checkpoint_path, map_location='cpu') |
| | if isinstance(checkpoint, dict) and 'state_dict' in checkpoint: |
| | if log_info: |
| | _logger.info('Restoring model state from checkpoint...') |
| | new_state_dict = OrderedDict() |
| | for k, v in checkpoint['state_dict'].items(): |
| | name = k[7:] if k.startswith('module') else k |
| | new_state_dict[name] = v |
| | model.load_state_dict(new_state_dict) |
| |
|
| | if optimizer is not None and 'optimizer' in checkpoint: |
| | if log_info: |
| | _logger.info('Restoring optimizer state from checkpoint...') |
| | optimizer.load_state_dict(checkpoint['optimizer']) |
| |
|
| | if loss_scaler is not None and loss_scaler.state_dict_key in checkpoint: |
| | if log_info: |
| | _logger.info('Restoring AMP loss scaler state from checkpoint...') |
| | loss_scaler.load_state_dict(checkpoint[loss_scaler.state_dict_key]) |
| |
|
| | if 'epoch' in checkpoint: |
| | resume_epoch = checkpoint['epoch'] |
| | if 'version' in checkpoint and checkpoint['version'] > 1: |
| | resume_epoch += 1 |
| |
|
| | if log_info: |
| | _logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch'])) |
| | else: |
| | model.load_state_dict(checkpoint) |
| | if log_info: |
| | _logger.info("Loaded checkpoint '{}'".format(checkpoint_path)) |
| | return resume_epoch |
| | else: |
| | _logger.error("No checkpoint found at '{}'".format(checkpoint_path)) |
| | raise FileNotFoundError() |
| |
|
| |
|
| | def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=None, strict=True): |
| | if cfg is None: |
| | cfg = getattr(model, 'default_cfg') |
| | if cfg is None or 'url' not in cfg or not cfg['url']: |
| | _logger.warning("Pretrained model URL is invalid, using random initialization.") |
| | return |
| |
|
| | state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu') |
| |
|
| | if filter_fn is not None: |
| | state_dict = filter_fn(state_dict) |
| |
|
| | if in_chans == 1: |
| | conv1_name = cfg['first_conv'] |
| | _logger.info('Converting first conv (%s) pretrained weights from 3 to 1 channel' % conv1_name) |
| | conv1_weight = state_dict[conv1_name + '.weight'] |
| | |
| | conv1_type = conv1_weight.dtype |
| | conv1_weight = conv1_weight.float() |
| | O, I, J, K = conv1_weight.shape |
| | if I > 3: |
| | assert conv1_weight.shape[1] % 3 == 0 |
| | |
| | conv1_weight = conv1_weight.reshape(O, I // 3, 3, J, K) |
| | conv1_weight = conv1_weight.sum(dim=2, keepdim=False) |
| | else: |
| | conv1_weight = conv1_weight.sum(dim=1, keepdim=True) |
| | conv1_weight = conv1_weight.to(conv1_type) |
| | state_dict[conv1_name + '.weight'] = conv1_weight |
| | elif in_chans != 3: |
| | conv1_name = cfg['first_conv'] |
| | conv1_weight = state_dict[conv1_name + '.weight'] |
| | conv1_type = conv1_weight.dtype |
| | conv1_weight = conv1_weight.float() |
| | O, I, J, K = conv1_weight.shape |
| | if I != 3: |
| | _logger.warning('Deleting first conv (%s) from pretrained weights.' % conv1_name) |
| | del state_dict[conv1_name + '.weight'] |
| | strict = False |
| | else: |
| | |
| | |
| | _logger.info('Repeating first conv (%s) weights in channel dim.' % conv1_name) |
| | repeat = int(math.ceil(in_chans / 3)) |
| | conv1_weight = conv1_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :] |
| | conv1_weight *= (3 / float(in_chans)) |
| | conv1_weight = conv1_weight.to(conv1_type) |
| | state_dict[conv1_name + '.weight'] = conv1_weight |
| |
|
| | classifier_name = cfg['classifier'] |
| | if num_classes == 1000 and cfg['num_classes'] == 1001: |
| | |
| | classifier_weight = state_dict[classifier_name + '.weight'] |
| | state_dict[classifier_name + '.weight'] = classifier_weight[1:] |
| | classifier_bias = state_dict[classifier_name + '.bias'] |
| | state_dict[classifier_name + '.bias'] = classifier_bias[1:] |
| | elif num_classes != cfg['num_classes']: |
| | |
| | del state_dict[classifier_name + '.weight'] |
| | del state_dict[classifier_name + '.bias'] |
| | strict = False |
| |
|
| | model.load_state_dict(state_dict, strict=strict) |
| |
|
| |
|
| | def extract_layer(model, layer): |
| | layer = layer.split('.') |
| | module = model |
| | if hasattr(model, 'module') and layer[0] != 'module': |
| | module = model.module |
| | if not hasattr(model, 'module') and layer[0] == 'module': |
| | layer = layer[1:] |
| | for l in layer: |
| | if hasattr(module, l): |
| | if not l.isdigit(): |
| | module = getattr(module, l) |
| | else: |
| | module = module[int(l)] |
| | else: |
| | return module |
| | return module |
| |
|
| |
|
| | def set_layer(model, layer, val): |
| | layer = layer.split('.') |
| | module = model |
| | if hasattr(model, 'module') and layer[0] != 'module': |
| | module = model.module |
| | lst_index = 0 |
| | module2 = module |
| | for l in layer: |
| | if hasattr(module2, l): |
| | if not l.isdigit(): |
| | module2 = getattr(module2, l) |
| | else: |
| | module2 = module2[int(l)] |
| | lst_index += 1 |
| | lst_index -= 1 |
| | for l in layer[:lst_index]: |
| | if not l.isdigit(): |
| | module = getattr(module, l) |
| | else: |
| | module = module[int(l)] |
| | l = layer[lst_index] |
| | setattr(module, l, val) |
| |
|
| |
|
| | def adapt_model_from_string(parent_module, model_string): |
| | separator = '***' |
| | state_dict = {} |
| | lst_shape = model_string.split(separator) |
| | for k in lst_shape: |
| | k = k.split(':') |
| | key = k[0] |
| | shape = k[1][1:-1].split(',') |
| | if shape[0] != '': |
| | state_dict[key] = [int(i) for i in shape] |
| |
|
| | new_module = deepcopy(parent_module) |
| | for n, m in parent_module.named_modules(): |
| | old_module = extract_layer(parent_module, n) |
| | if isinstance(old_module, nn.Conv2d) or isinstance(old_module, Conv2dSame): |
| | if isinstance(old_module, Conv2dSame): |
| | conv = Conv2dSame |
| | else: |
| | conv = nn.Conv2d |
| | s = state_dict[n + '.weight'] |
| | in_channels = s[1] |
| | out_channels = s[0] |
| | g = 1 |
| | if old_module.groups > 1: |
| | in_channels = out_channels |
| | g = in_channels |
| | new_conv = conv( |
| | in_channels=in_channels, out_channels=out_channels, kernel_size=old_module.kernel_size, |
| | bias=old_module.bias is not None, padding=old_module.padding, dilation=old_module.dilation, |
| | groups=g, stride=old_module.stride) |
| | set_layer(new_module, n, new_conv) |
| | if isinstance(old_module, nn.BatchNorm2d): |
| | new_bn = nn.BatchNorm2d( |
| | num_features=state_dict[n + '.weight'][0], eps=old_module.eps, momentum=old_module.momentum, |
| | affine=old_module.affine, track_running_stats=True) |
| | set_layer(new_module, n, new_bn) |
| | if isinstance(old_module, nn.Linear): |
| | |
| | num_features = state_dict[n + '.weight'][1] |
| | new_fc = nn.Linear( |
| | in_features=num_features, out_features=old_module.out_features, bias=old_module.bias is not None) |
| | set_layer(new_module, n, new_fc) |
| | if hasattr(new_module, 'num_features'): |
| | new_module.num_features = num_features |
| | new_module.eval() |
| | parent_module.eval() |
| |
|
| | return new_module |
| |
|
| |
|
| | def adapt_model_from_file(parent_module, model_variant): |
| | adapt_file = os.path.join(os.path.dirname(__file__), 'pruned', model_variant + '.txt') |
| | with open(adapt_file, 'r') as f: |
| | return adapt_model_from_string(parent_module, f.read().strip()) |
| |
|
| |
|
| | def build_model_with_cfg( |
| | model_cls: Callable, |
| | variant: str, |
| | pretrained: bool, |
| | default_cfg: dict, |
| | model_cfg: dict = None, |
| | feature_cfg: dict = None, |
| | pretrained_strict: bool = True, |
| | pretrained_filter_fn: Callable = None, |
| | **kwargs): |
| | pruned = kwargs.pop('pruned', False) |
| | features = False |
| | feature_cfg = feature_cfg or {} |
| |
|
| | if kwargs.pop('features_only', False): |
| | features = True |
| | feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4)) |
| | if 'out_indices' in kwargs: |
| | feature_cfg['out_indices'] = kwargs.pop('out_indices') |
| |
|
| | model = model_cls(**kwargs) if model_cfg is None else model_cls(cfg=model_cfg, **kwargs) |
| | model.default_cfg = deepcopy(default_cfg) |
| |
|
| | if pruned: |
| | model = adapt_model_from_file(model, variant) |
| |
|
| | if pretrained: |
| | load_pretrained( |
| | model, |
| | num_classes=kwargs.get('num_classes', 0), |
| | in_chans=kwargs.get('in_chans', 3), |
| | filter_fn=pretrained_filter_fn, strict=pretrained_strict) |
| |
|
| | if features: |
| | feature_cls = FeatureListNet |
| | if 'feature_cls' in feature_cfg: |
| | feature_cls = feature_cfg.pop('feature_cls') |
| | if isinstance(feature_cls, str): |
| | feature_cls = feature_cls.lower() |
| | if 'hook' in feature_cls: |
| | feature_cls = FeatureHookNet |
| | else: |
| | assert False, f'Unknown feature class {feature_cls}' |
| | model = feature_cls(model, **feature_cfg) |
| |
|
| | return model |