Spaces:
Sleeping
Sleeping
Commit
·
704a5f0
1
Parent(s):
5a8e7c0
CPU fallback: avoid CUDA JIT by disabling fused ops when build unavailable; force CPU
Browse files
models/psp/stylegan2/op/fused_act.py
CHANGED
|
@@ -3,15 +3,25 @@ import os
|
|
| 3 |
import torch
|
| 4 |
from torch import nn
|
| 5 |
from torch.autograd import Function
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
fused =
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
class FusedLeakyReLUFunctionBackward(Function):
|
| 17 |
@staticmethod
|
|
@@ -22,9 +32,13 @@ class FusedLeakyReLUFunctionBackward(Function):
|
|
| 22 |
|
| 23 |
empty = grad_output.new_empty(0)
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
dim = [0]
|
| 30 |
|
|
@@ -38,9 +52,12 @@ class FusedLeakyReLUFunctionBackward(Function):
|
|
| 38 |
@staticmethod
|
| 39 |
def backward(ctx, gradgrad_input, gradgrad_bias):
|
| 40 |
(out,) = ctx.saved_tensors
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
return gradgrad_out, None, None, None
|
| 46 |
|
|
@@ -48,8 +65,13 @@ class FusedLeakyReLUFunctionBackward(Function):
|
|
| 48 |
class FusedLeakyReLUFunction(Function):
|
| 49 |
@staticmethod
|
| 50 |
def forward(ctx, input, bias, negative_slope, scale):
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
ctx.save_for_backward(out)
|
| 54 |
ctx.negative_slope = negative_slope
|
| 55 |
ctx.scale = scale
|
|
|
|
| 3 |
import torch
|
| 4 |
from torch import nn
|
| 5 |
from torch.autograd import Function
|
| 6 |
+
|
| 7 |
+
_USE_FUSED = False
|
| 8 |
+
fused = None
|
| 9 |
+
try:
|
| 10 |
+
# Prefer fused CUDA extension when available
|
| 11 |
+
from torch.utils.cpp_extension import load
|
| 12 |
+
module_path = os.path.dirname(__file__)
|
| 13 |
+
fused = load(
|
| 14 |
+
"fused",
|
| 15 |
+
sources=[
|
| 16 |
+
os.path.join(module_path, "fused_bias_act.cpp"),
|
| 17 |
+
os.path.join(module_path, "fused_bias_act_kernel.cu"),
|
| 18 |
+
],
|
| 19 |
+
verbose=False,
|
| 20 |
+
)
|
| 21 |
+
_USE_FUSED = True
|
| 22 |
+
except Exception:
|
| 23 |
+
# CPU-only or build-restricted environments (e.g., HF Spaces) fallback
|
| 24 |
+
_USE_FUSED = False
|
| 25 |
|
| 26 |
class FusedLeakyReLUFunctionBackward(Function):
|
| 27 |
@staticmethod
|
|
|
|
| 32 |
|
| 33 |
empty = grad_output.new_empty(0)
|
| 34 |
|
| 35 |
+
if _USE_FUSED:
|
| 36 |
+
grad_input = fused.fused_bias_act(
|
| 37 |
+
grad_output, empty, out, 3, 1, negative_slope, scale
|
| 38 |
+
)
|
| 39 |
+
else:
|
| 40 |
+
# Fallback: autograd-friendly composite ops
|
| 41 |
+
grad_input = torch.nn.functional.leaky_relu(grad_output, negative_slope) * scale
|
| 42 |
|
| 43 |
dim = [0]
|
| 44 |
|
|
|
|
| 52 |
@staticmethod
|
| 53 |
def backward(ctx, gradgrad_input, gradgrad_bias):
|
| 54 |
(out,) = ctx.saved_tensors
|
| 55 |
+
if _USE_FUSED:
|
| 56 |
+
gradgrad_out = fused.fused_bias_act(
|
| 57 |
+
gradgrad_input, gradgrad_bias, out, 3, 1, ctx.negative_slope, ctx.scale
|
| 58 |
+
)
|
| 59 |
+
else:
|
| 60 |
+
gradgrad_out = torch.nn.functional.leaky_relu(gradgrad_input, ctx.negative_slope) * ctx.scale
|
| 61 |
|
| 62 |
return gradgrad_out, None, None, None
|
| 63 |
|
|
|
|
| 65 |
class FusedLeakyReLUFunction(Function):
|
| 66 |
@staticmethod
|
| 67 |
def forward(ctx, input, bias, negative_slope, scale):
|
| 68 |
+
if _USE_FUSED:
|
| 69 |
+
empty = input.new_empty(0)
|
| 70 |
+
out = fused.fused_bias_act(input, bias, empty, 3, 0, negative_slope, scale)
|
| 71 |
+
else:
|
| 72 |
+
if bias is not None:
|
| 73 |
+
input = input + bias.view(1, -1, *([1] * (input.dim() - 2)))
|
| 74 |
+
out = torch.nn.functional.leaky_relu(input, negative_slope) * scale
|
| 75 |
ctx.save_for_backward(out)
|
| 76 |
ctx.negative_slope = negative_slope
|
| 77 |
ctx.scale = scale
|
models/psp/stylegan2/op/upfirdn2d.py
CHANGED
|
@@ -1,17 +1,25 @@
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
import torch
|
|
|
|
| 4 |
from torch.autograd import Function
|
| 5 |
-
from torch.utils.cpp_extension import load
|
| 6 |
|
| 7 |
-
|
| 8 |
-
upfirdn2d_op =
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
class UpFirDn2dBackward(Function):
|
|
@@ -25,18 +33,24 @@ class UpFirDn2dBackward(Function):
|
|
| 25 |
|
| 26 |
grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3])
|
| 41 |
|
| 42 |
ctx.save_for_backward(kernel)
|
|
@@ -62,18 +76,23 @@ class UpFirDn2dBackward(Function):
|
|
| 62 |
|
| 63 |
gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1)
|
| 64 |
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
# gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], ctx.out_size[1], ctx.in_size[3])
|
| 78 |
gradgrad_out = gradgrad_out.view(
|
| 79 |
ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]
|
|
@@ -112,9 +131,12 @@ class UpFirDn2d(Function):
|
|
| 112 |
|
| 113 |
ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
|
|
|
|
|
|
|
|
|
| 118 |
# out = out.view(major, out_h, out_w, minor)
|
| 119 |
out = out.view(-1, channel, out_h, out_w)
|
| 120 |
|
|
|
|
| 1 |
import os
|
| 2 |
|
| 3 |
import torch
|
| 4 |
+
import torch.nn.functional as F
|
| 5 |
from torch.autograd import Function
|
|
|
|
| 6 |
|
| 7 |
+
_USE_EXT = False
|
| 8 |
+
upfirdn2d_op = None
|
| 9 |
+
try:
|
| 10 |
+
from torch.utils.cpp_extension import load
|
| 11 |
+
module_path = os.path.dirname(__file__)
|
| 12 |
+
upfirdn2d_op = load(
|
| 13 |
+
"upfirdn2d",
|
| 14 |
+
sources=[
|
| 15 |
+
os.path.join(module_path, "upfirdn2d.cpp"),
|
| 16 |
+
os.path.join(module_path, "upfirdn2d_kernel.cu"),
|
| 17 |
+
],
|
| 18 |
+
verbose=False,
|
| 19 |
+
)
|
| 20 |
+
_USE_EXT = True
|
| 21 |
+
except Exception:
|
| 22 |
+
_USE_EXT = False
|
| 23 |
|
| 24 |
|
| 25 |
class UpFirDn2dBackward(Function):
|
|
|
|
| 33 |
|
| 34 |
grad_output = grad_output.reshape(-1, out_size[0], out_size[1], 1)
|
| 35 |
|
| 36 |
+
if _USE_EXT:
|
| 37 |
+
grad_input = upfirdn2d_op.upfirdn2d(
|
| 38 |
+
grad_output,
|
| 39 |
+
grad_kernel,
|
| 40 |
+
down_x,
|
| 41 |
+
down_y,
|
| 42 |
+
up_x,
|
| 43 |
+
up_y,
|
| 44 |
+
g_pad_x0,
|
| 45 |
+
g_pad_x1,
|
| 46 |
+
g_pad_y0,
|
| 47 |
+
g_pad_y1,
|
| 48 |
+
)
|
| 49 |
+
else:
|
| 50 |
+
# Fallback approximation using native ops
|
| 51 |
+
grad_input = upfirdn2d_native(
|
| 52 |
+
grad_output, grad_kernel, up_x, up_y, down_x, down_y, g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1
|
| 53 |
+
)
|
| 54 |
grad_input = grad_input.view(in_size[0], in_size[1], in_size[2], in_size[3])
|
| 55 |
|
| 56 |
ctx.save_for_backward(kernel)
|
|
|
|
| 76 |
|
| 77 |
gradgrad_input = gradgrad_input.reshape(-1, ctx.in_size[2], ctx.in_size[3], 1)
|
| 78 |
|
| 79 |
+
if _USE_EXT:
|
| 80 |
+
gradgrad_out = upfirdn2d_op.upfirdn2d(
|
| 81 |
+
gradgrad_input,
|
| 82 |
+
kernel,
|
| 83 |
+
ctx.up_x,
|
| 84 |
+
ctx.up_y,
|
| 85 |
+
ctx.down_x,
|
| 86 |
+
ctx.down_y,
|
| 87 |
+
ctx.pad_x0,
|
| 88 |
+
ctx.pad_x1,
|
| 89 |
+
ctx.pad_y0,
|
| 90 |
+
ctx.pad_y1,
|
| 91 |
+
)
|
| 92 |
+
else:
|
| 93 |
+
gradgrad_out = upfirdn2d_native(
|
| 94 |
+
gradgrad_input, kernel, ctx.up_x, ctx.up_y, ctx.down_x, ctx.down_y, ctx.pad_x0, ctx.pad_x1, ctx.pad_y0, ctx.pad_y1
|
| 95 |
+
)
|
| 96 |
# gradgrad_out = gradgrad_out.view(ctx.in_size[0], ctx.out_size[0], ctx.out_size[1], ctx.in_size[3])
|
| 97 |
gradgrad_out = gradgrad_out.view(
|
| 98 |
ctx.in_size[0], ctx.in_size[1], ctx.out_size[0], ctx.out_size[1]
|
|
|
|
| 131 |
|
| 132 |
ctx.g_pad = (g_pad_x0, g_pad_x1, g_pad_y0, g_pad_y1)
|
| 133 |
|
| 134 |
+
if _USE_EXT:
|
| 135 |
+
out = upfirdn2d_op.upfirdn2d(
|
| 136 |
+
input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
|
| 137 |
+
)
|
| 138 |
+
else:
|
| 139 |
+
out = upfirdn2d_native(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1)
|
| 140 |
# out = out.view(major, out_h, out_w, minor)
|
| 141 |
out = out.view(-1, channel, out_h, out_w)
|
| 142 |
|