qbhf2's picture
added NvidiaWarp and GarmentCode repos
66c9c8a
# Copyright (c) 2022 NVIDIA CORPORATION. All rights reserved.
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
from warp.context import synchronize
import warp as wp
import numpy as np
@wp.kernel
def gd_step(arr_x: wp.array(dtype=float), arr_dfdx: wp.array(dtype=float), alpha: float):
tid = wp.tid()
x = arr_x[tid]
dfdx = arr_dfdx[tid]
x = x - dfdx * alpha
arr_x[tid] = x
@wp.kernel
def nesterov1(beta: float, x: wp.array(dtype=float), x_prev: wp.array(dtype=float), y: wp.array(dtype=float)):
tid = wp.tid()
y[tid] = x[tid] + beta * (x[tid] - x_prev[tid])
@wp.kernel
def nesterov2(
alpha: float,
beta: wp.array(dtype=float),
eta: wp.array(dtype=float),
x: wp.array(dtype=float),
x_prev: wp.array(dtype=float),
y: wp.array(dtype=float),
dfdx: wp.array(dtype=float),
):
# if (eta > 0.0):
# # adaptive restart
# x_prev = x
# b = 0
# else:
# # nesterov update
# x_prev = x
# x = y - alpha*dfdx
tid = wp.tid()
x_prev[tid] = x[tid]
x[tid] = y[tid] - alpha * dfdx[tid]
def inner(a, b, out):
from warp.utils import array_inner
array_inner(a, b, out)
class Optimizer:
def __init__(self, n, mode, device):
self.n = n
self.mode = mode
self.device = device
# allocate space for residual buffers
self.dfdx = wp.zeros(n, dtype=float, device=device)
if mode == "nesterov":
self.x_prev = wp.zeros(n, dtype=float, device=device)
self.y = wp.zeros(n, dtype=float, device=device)
self.eta = wp.zeros(1, dtype=float, device=device)
self.eta_prev = wp.zeros(1, dtype=float, device=device)
self.beta = wp.zeros(1, dtype=int, device=device)
def solve(self, x, grad_func, max_iters=20, alpha=0.01, report=False):
if report:
stats = {}
# reset stats
stats["evals"] = 0
stats["residual"] = []
if self.mode == "gd":
for i in range(max_iters):
# compute residual
grad_func(x, self.dfdx)
# gradient step
wp.launch(kernel=gd_step, dim=self.n, inputs=[x, self.dfdx, alpha], device=self.device)
if report:
stats["evals"] += 1
r = np.linalg.norm(self.dfdx.to("cpu").numpy())
stats["residual"].append(r)
elif self.mode == "nesterov":
wp.copy(self.x_prev, x)
# momentum index (reset after restart)
b = 0
for iter in range(max_iters):
beta = (b - 1.0) / (b + 2.0)
b += 1
# y = x + beta*(x - x_prev)
wp.launch(kernel=nesterov1, dim=self.n, inputs=[beta, x, self.x_prev, self.y], device=self.device)
# grad
grad_func(self.y, self.dfdx)
# inner()
# np.dot(dfdx, x - x_prev)
# x = y - alpha*dfdx
wp.launch(
kernel=nesterov2,
dim=self.n,
inputs=[alpha, None, None, x, self.x_prev, self.y, self.dfdx],
device=self.device,
)
if report:
stats["evals"] += 1
r = np.linalg.norm(self.dfdx.to("cpu").numpy())
stats["residual"].append(r)
else:
raise RuntimeError("Unknown optimizer")
if report:
print(stats)