Spaces:
Build error
Build error
| import os | |
| import logging | |
| import time | |
| from collections import namedtuple | |
| from pathlib import Path | |
| import torch | |
| import torch.optim as optim | |
| import torch.nn as nn | |
| import numpy as np | |
| from torch.utils.data import DataLoader | |
| from prefetch_generator import BackgroundGenerator | |
| from contextlib import contextmanager | |
| import re | |
| def clean_str(s): | |
| # Cleans a string by replacing special characters with underscore _ | |
| return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s) | |
| def create_logger(cfg, cfg_path, phase='train', rank=-1): | |
| # set up logger dir | |
| dataset = cfg.DATASET.DATASET | |
| dataset = dataset.replace(':', '_') | |
| model = cfg.MODEL.NAME | |
| cfg_path = os.path.basename(cfg_path).split('.')[0] | |
| if rank in [-1, 0]: | |
| time_str = time.strftime('%Y-%m-%d-%H-%M') | |
| log_file = '{}_{}_{}.log'.format(cfg_path, time_str, phase) | |
| # set up tensorboard_log_dir | |
| tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ | |
| (cfg_path + '_' + time_str) | |
| final_output_dir = tensorboard_log_dir | |
| if not tensorboard_log_dir.exists(): | |
| print('=> creating {}'.format(tensorboard_log_dir)) | |
| tensorboard_log_dir.mkdir(parents=True) | |
| final_log_file = tensorboard_log_dir / log_file | |
| head = '%(asctime)-15s %(message)s' | |
| logging.basicConfig(filename=str(final_log_file), | |
| format=head) | |
| logger = logging.getLogger() | |
| logger.setLevel(logging.INFO) | |
| console = logging.StreamHandler() | |
| logging.getLogger('').addHandler(console) | |
| return logger, str(final_output_dir), str(tensorboard_log_dir) | |
| else: | |
| return None, None, None | |
| def select_device(logger=None, device='', batch_size=None): | |
| # device = 'cpu' or '0' or '0,1,2,3' | |
| cpu_request = device.lower() == 'cpu' | |
| if device and not cpu_request: # if device requested other than 'cpu' | |
| os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable | |
| assert torch.cuda.is_available(), 'CUDA unavailable, invalid device %s requested' % device # check availablity | |
| cuda = False if cpu_request else torch.cuda.is_available() | |
| if cuda: | |
| c = 1024 ** 2 # bytes to MB | |
| ng = torch.cuda.device_count() | |
| if ng > 1 and batch_size: # check that batch_size is compatible with device_count | |
| assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (batch_size, ng) | |
| x = [torch.cuda.get_device_properties(i) for i in range(ng)] | |
| s = f'Using torch {torch.__version__} ' | |
| for i in range(0, ng): | |
| if i == 1: | |
| s = ' ' * len(s) | |
| if logger: | |
| logger.info("%sCUDA:%g (%s, %dMB)" % (s, i, x[i].name, x[i].total_memory / c)) | |
| else: | |
| if logger: | |
| logger.info(f'Using torch {torch.__version__} CPU') | |
| if logger: | |
| logger.info('') # skip a line | |
| return torch.device('cuda:0' if cuda else 'cpu') | |
| def get_optimizer(cfg, model): | |
| optimizer = None | |
| if cfg.TRAIN.OPTIMIZER == 'sgd': | |
| optimizer = optim.SGD( | |
| filter(lambda p: p.requires_grad, model.parameters()), | |
| lr=cfg.TRAIN.LR0, | |
| momentum=cfg.TRAIN.MOMENTUM, | |
| weight_decay=cfg.TRAIN.WD, | |
| nesterov=cfg.TRAIN.NESTEROV | |
| ) | |
| elif cfg.TRAIN.OPTIMIZER == 'adam': | |
| optimizer = optim.Adam( | |
| filter(lambda p: p.requires_grad, model.parameters()), | |
| #model.parameters(), | |
| lr=cfg.TRAIN.LR0, | |
| betas=(cfg.TRAIN.MOMENTUM, 0.999) | |
| ) | |
| return optimizer | |
| def save_checkpoint(epoch, name, model, optimizer, output_dir, filename, is_best=False): | |
| model_state = model.module.state_dict() if is_parallel(model) else model.state_dict() | |
| checkpoint = { | |
| 'epoch': epoch, | |
| 'model': name, | |
| 'state_dict': model_state, | |
| # 'best_state_dict': model.module.state_dict(), | |
| # 'perf': perf_indicator, | |
| 'optimizer': optimizer.state_dict(), | |
| } | |
| torch.save(checkpoint, os.path.join(output_dir, filename)) | |
| if is_best and 'state_dict' in checkpoint: | |
| torch.save(checkpoint['best_state_dict'], | |
| os.path.join(output_dir, 'model_best.pth')) | |
| def initialize_weights(model): | |
| for m in model.modules(): | |
| t = type(m) | |
| if t is nn.Conv2d: | |
| pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') | |
| elif t is nn.BatchNorm2d: | |
| m.eps = 1e-3 | |
| m.momentum = 0.03 | |
| elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]: | |
| # elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]: | |
| m.inplace = True | |
| def xyxy2xywh(x): | |
| # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right | |
| y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) | |
| y[:, 0] = (x[:, 0] + x[:, 2]) / 2 # x center | |
| y[:, 1] = (x[:, 1] + x[:, 3]) / 2 # y center | |
| y[:, 2] = x[:, 2] - x[:, 0] # width | |
| y[:, 3] = x[:, 3] - x[:, 1] # height | |
| return y | |
| def is_parallel(model): | |
| return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) | |
| def time_synchronized(): | |
| torch.cuda.synchronize() if torch.cuda.is_available() else None | |
| return time.time() | |
| class DataLoaderX(DataLoader): | |
| """prefetch dataloader""" | |
| def __iter__(self): | |
| return BackgroundGenerator(super().__iter__()) | |
| def torch_distributed_zero_first(local_rank: int): | |
| """ | |
| Decorator to make all processes in distributed training wait for each local_master to do something. | |
| """ | |
| if local_rank not in [-1, 0]: | |
| torch.distributed.barrier() | |
| yield | |
| if local_rank == 0: | |
| torch.distributed.barrier() | |