| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| |
|
| | """Custom PyTorch ops for efficient bias and activation."""
|
| |
|
| | import os
|
| | import warnings
|
| | import numpy as np
|
| | import torch
|
| | import dnnlib
|
| | import traceback
|
| |
|
| | from .. import custom_ops
|
| | from .. import misc
|
| |
|
| |
|
| |
|
| | activation_funcs = {
|
| | 'linear': dnnlib.EasyDict(func=lambda x, **_: x, def_alpha=0, def_gain=1, cuda_idx=1, ref='', has_2nd_grad=False),
|
| | 'relu': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.relu(x), def_alpha=0, def_gain=np.sqrt(2), cuda_idx=2, ref='y', has_2nd_grad=False),
|
| | 'lrelu': dnnlib.EasyDict(func=lambda x, alpha, **_: torch.nn.functional.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', has_2nd_grad=False),
|
| | 'tanh': dnnlib.EasyDict(func=lambda x, **_: torch.tanh(x), def_alpha=0, def_gain=1, cuda_idx=4, ref='y', has_2nd_grad=True),
|
| | 'sigmoid': dnnlib.EasyDict(func=lambda x, **_: torch.sigmoid(x), def_alpha=0, def_gain=1, cuda_idx=5, ref='y', has_2nd_grad=True),
|
| | 'elu': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.elu(x), def_alpha=0, def_gain=1, cuda_idx=6, ref='y', has_2nd_grad=True),
|
| | 'selu': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.selu(x), def_alpha=0, def_gain=1, cuda_idx=7, ref='y', has_2nd_grad=True),
|
| | 'softplus': dnnlib.EasyDict(func=lambda x, **_: torch.nn.functional.softplus(x), def_alpha=0, def_gain=1, cuda_idx=8, ref='y', has_2nd_grad=True),
|
| | 'swish': dnnlib.EasyDict(func=lambda x, **_: torch.sigmoid(x) * x, def_alpha=0, def_gain=np.sqrt(2), cuda_idx=9, ref='x', has_2nd_grad=True),
|
| | }
|
| |
|
| |
|
| |
|
| | _inited = False
|
| | _plugin = None
|
| | _null_tensor = torch.empty([0])
|
| |
|
| | def _init():
|
| | global _inited, _plugin
|
| | if not _inited:
|
| | _inited = True
|
| | sources = ['bias_act.cpp', 'bias_act.cu']
|
| | sources = [os.path.join(os.path.dirname(__file__), s) for s in sources]
|
| | try:
|
| | _plugin = custom_ops.get_plugin('bias_act_plugin', sources=sources, extra_cuda_cflags=['--use_fast_math'])
|
| | except:
|
| | warnings.warn('Failed to build CUDA kernels for bias_act. Falling back to slow reference implementation. Details:\n\n' + traceback.format_exc())
|
| | return _plugin is not None
|
| |
|
| |
|
| |
|
| | def bias_act(x, b=None, dim=1, act='linear', alpha=None, gain=None, clamp=None, impl='cuda'):
|
| | r"""Fused bias and activation function.
|
| |
|
| | Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
|
| | and scales the result by `gain`. Each of the steps is optional. In most cases,
|
| | the fused op is considerably more efficient than performing the same calculation
|
| | using standard PyTorch ops. It supports first and second order gradients,
|
| | but not third order gradients.
|
| |
|
| | Args:
|
| | x: Input activation tensor. Can be of any shape.
|
| | b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type
|
| | as `x`. The shape must be known, and it must match the dimension of `x`
|
| | corresponding to `dim`.
|
| | dim: The dimension in `x` corresponding to the elements of `b`.
|
| | The value of `dim` is ignored if `b` is not specified.
|
| | act: Name of the activation function to evaluate, or `"linear"` to disable.
|
| | Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
|
| | See `activation_funcs` for a full list. `None` is not allowed.
|
| | alpha: Shape parameter for the activation function, or `None` to use the default.
|
| | gain: Scaling factor for the output tensor, or `None` to use default.
|
| | See `activation_funcs` for the default scaling of each activation function.
|
| | If unsure, consider specifying 1.
|
| | clamp: Clamp the output values to `[-clamp, +clamp]`, or `None` to disable
|
| | the clamping (default).
|
| | impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
|
| |
|
| | Returns:
|
| | Tensor of the same shape and datatype as `x`.
|
| | """
|
| | assert isinstance(x, torch.Tensor)
|
| | assert impl in ['ref', 'cuda']
|
| | if impl == 'cuda' and x.device.type == 'cuda' and _init():
|
| | return _bias_act_cuda(dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp).apply(x, b)
|
| | return _bias_act_ref(x=x, b=b, dim=dim, act=act, alpha=alpha, gain=gain, clamp=clamp)
|
| |
|
| |
|
| |
|
| | @misc.profiled_function
|
| | def _bias_act_ref(x, b=None, dim=1, act='linear', alpha=None, gain=None, clamp=None):
|
| | """Slow reference implementation of `bias_act()` using standard TensorFlow ops.
|
| | """
|
| | assert isinstance(x, torch.Tensor)
|
| | assert clamp is None or clamp >= 0
|
| | spec = activation_funcs[act]
|
| | alpha = float(alpha if alpha is not None else spec.def_alpha)
|
| | gain = float(gain if gain is not None else spec.def_gain)
|
| | clamp = float(clamp if clamp is not None else -1)
|
| |
|
| |
|
| | if b is not None:
|
| | assert isinstance(b, torch.Tensor) and b.ndim == 1
|
| | assert 0 <= dim < x.ndim
|
| | assert b.shape[0] == x.shape[dim]
|
| | x = x + b.reshape([-1 if i == dim else 1 for i in range(x.ndim)])
|
| |
|
| |
|
| | alpha = float(alpha)
|
| | x = spec.func(x, alpha=alpha)
|
| |
|
| |
|
| | gain = float(gain)
|
| | if gain != 1:
|
| | x = x * gain
|
| |
|
| |
|
| | if clamp >= 0:
|
| | x = x.clamp(-clamp, clamp)
|
| | return x
|
| |
|
| |
|
| |
|
| | _bias_act_cuda_cache = dict()
|
| |
|
| | def _bias_act_cuda(dim=1, act='linear', alpha=None, gain=None, clamp=None):
|
| | """Fast CUDA implementation of `bias_act()` using custom ops.
|
| | """
|
| |
|
| | assert clamp is None or clamp >= 0
|
| | spec = activation_funcs[act]
|
| | alpha = float(alpha if alpha is not None else spec.def_alpha)
|
| | gain = float(gain if gain is not None else spec.def_gain)
|
| | clamp = float(clamp if clamp is not None else -1)
|
| |
|
| |
|
| | key = (dim, act, alpha, gain, clamp)
|
| | if key in _bias_act_cuda_cache:
|
| | return _bias_act_cuda_cache[key]
|
| |
|
| |
|
| | class BiasActCuda(torch.autograd.Function):
|
| | @staticmethod
|
| | def forward(ctx, x, b):
|
| | ctx.memory_format = torch.channels_last if x.ndim > 2 and x.stride()[1] == 1 else torch.contiguous_format
|
| | x = x.contiguous(memory_format=ctx.memory_format)
|
| | b = b.contiguous() if b is not None else _null_tensor
|
| | y = x
|
| | if act != 'linear' or gain != 1 or clamp >= 0 or b is not _null_tensor:
|
| | y = _plugin.bias_act(x, b, _null_tensor, _null_tensor, _null_tensor, 0, dim, spec.cuda_idx, alpha, gain, clamp)
|
| | ctx.save_for_backward(
|
| | x if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor,
|
| | b if 'x' in spec.ref or spec.has_2nd_grad else _null_tensor,
|
| | y if 'y' in spec.ref else _null_tensor)
|
| | return y
|
| |
|
| | @staticmethod
|
| | def backward(ctx, dy):
|
| | dy = dy.contiguous(memory_format=ctx.memory_format)
|
| | x, b, y = ctx.saved_tensors
|
| | dx = None
|
| | db = None
|
| |
|
| | if ctx.needs_input_grad[0] or ctx.needs_input_grad[1]:
|
| | dx = dy
|
| | if act != 'linear' or gain != 1 or clamp >= 0:
|
| | dx = BiasActCudaGrad.apply(dy, x, b, y)
|
| |
|
| | if ctx.needs_input_grad[1]:
|
| | db = dx.sum([i for i in range(dx.ndim) if i != dim])
|
| |
|
| | return dx, db
|
| |
|
| |
|
| | class BiasActCudaGrad(torch.autograd.Function):
|
| | @staticmethod
|
| | def forward(ctx, dy, x, b, y):
|
| | ctx.memory_format = torch.channels_last if dy.ndim > 2 and dy.stride()[1] == 1 else torch.contiguous_format
|
| | dx = _plugin.bias_act(dy, b, x, y, _null_tensor, 1, dim, spec.cuda_idx, alpha, gain, clamp)
|
| | ctx.save_for_backward(
|
| | dy if spec.has_2nd_grad else _null_tensor,
|
| | x, b, y)
|
| | return dx
|
| |
|
| | @staticmethod
|
| | def backward(ctx, d_dx):
|
| | d_dx = d_dx.contiguous(memory_format=ctx.memory_format)
|
| | dy, x, b, y = ctx.saved_tensors
|
| | d_dy = None
|
| | d_x = None
|
| | d_b = None
|
| | d_y = None
|
| |
|
| | if ctx.needs_input_grad[0]:
|
| | d_dy = BiasActCudaGrad.apply(d_dx, x, b, y)
|
| |
|
| | if spec.has_2nd_grad and (ctx.needs_input_grad[1] or ctx.needs_input_grad[2]):
|
| | d_x = _plugin.bias_act(d_dx, b, x, y, dy, 2, dim, spec.cuda_idx, alpha, gain, clamp)
|
| |
|
| | if spec.has_2nd_grad and ctx.needs_input_grad[2]:
|
| | d_b = d_x.sum([i for i in range(d_x.ndim) if i != dim])
|
| |
|
| | return d_dy, d_x, d_b, d_y
|
| |
|
| |
|
| | _bias_act_cuda_cache[key] = BiasActCuda
|
| | return BiasActCuda
|
| |
|
| |
|
| |
|