| | import torch |
| | import torch.nn as nn |
| | from torch.nn import init |
| | import functools |
| | from torch.optim import lr_scheduler |
| | from util.util import to_device, load_network |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | def init_weights(net, init_type='normal', init_gain=0.02): |
| | """Initialize network weights. |
| | |
| | Parameters: |
| | net (network) -- network to be initialized |
| | init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal |
| | init_gain (float) -- scaling factor for normal, xavier and orthogonal. |
| | |
| | We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might |
| | work better for some applications. Feel free to try yourself. |
| | """ |
| | def init_func(m): |
| | classname = m.__class__.__name__ |
| | if (isinstance(m, nn.Conv2d) |
| | or isinstance(m, nn.Linear) |
| | or isinstance(m, nn.Embedding)): |
| | |
| | if init_type == 'N02': |
| | init.normal_(m.weight.data, 0.0, init_gain) |
| | elif init_type in ['glorot', 'xavier']: |
| | init.xavier_normal_(m.weight.data, gain=init_gain) |
| | elif init_type == 'kaiming': |
| | init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') |
| | elif init_type == 'ortho': |
| | init.orthogonal_(m.weight.data, gain=init_gain) |
| | else: |
| | raise NotImplementedError('initialization method [%s] is not implemented' % init_type) |
| | |
| | |
| | |
| | |
| | |
| | if init_type in ['N02', 'glorot', 'xavier', 'kaiming', 'ortho']: |
| | print('initialize network with %s' % init_type) |
| | net.apply(init_func) |
| | else: |
| | print('loading the model from %s' % init_type) |
| | net = load_network(net, init_type, 'latest') |
| | return net |
| |
|
| | def init_net(net, init_type='normal', init_gain=0.02, gpu_ids=[]): |
| | """Initialize a network: 1. register CPU/GPU device (with multi-GPU support); 2. initialize the network weights |
| | Parameters: |
| | net (network) -- the network to be initialized |
| | init_type (str) -- the name of an initialization method: normal | xavier | kaiming | orthogonal |
| | gain (float) -- scaling factor for normal, xavier and orthogonal. |
| | gpu_ids (int list) -- which GPUs the network runs on: e.g., 0,1,2 |
| | |
| | Return an initialized network. |
| | """ |
| | if len(gpu_ids) > 0: |
| | assert(torch.cuda.is_available()) |
| | net.to(gpu_ids[0]) |
| | net = torch.nn.DataParallel(net, gpu_ids) |
| | init_weights(net, init_type, init_gain=init_gain) |
| | return net |
| |
|
| |
|
| | def get_scheduler(optimizer, opt): |
| | """Return a learning rate scheduler |
| | |
| | Parameters: |
| | optimizer -- the optimizer of the network |
| | opt (option class) -- stores all the experiment flags; needs to be a subclass of BaseOptions. |
| | opt.lr_policy is the name of learning rate policy: linear | step | plateau | cosine |
| | |
| | For 'linear', we keep the same learning rate for the first <opt.niter> epochs |
| | and linearly decay the rate to zero over the next <opt.niter_decay> epochs. |
| | For other schedulers (step, plateau, and cosine), we use the default PyTorch schedulers. |
| | See https://pytorch.org/docs/stable/optim.html for more details. |
| | """ |
| | if opt.lr_policy == 'linear': |
| | def lambda_rule(epoch): |
| | lr_l = 1.0 - max(0, epoch + opt.epoch_count - opt.niter) / float(opt.niter_decay + 1) |
| | return lr_l |
| | scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_rule) |
| | elif opt.lr_policy == 'step': |
| | scheduler = lr_scheduler.StepLR(optimizer, step_size=opt.lr_decay_iters, gamma=0.1) |
| | elif opt.lr_policy == 'plateau': |
| | scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.2, threshold=0.01, patience=5) |
| | elif opt.lr_policy == 'cosine': |
| | scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=opt.niter, eta_min=0) |
| | else: |
| | return NotImplementedError('learning rate policy [%s] is not implemented', opt.lr_policy) |
| | return scheduler |
| |
|
| |
|