CODE = r""" import torch from torch.optim import Optimizer class GradientDescent(Optimizer): def __init__(self, params, lr=1e-2): if lr < 0: raise ValueError(f"Invalid learning rate: {lr}") defaults = dict(lr=lr) super().__init__(params, defaults) @torch.no_grad() def step(self, closure=None): loss = None if closure is not None: with torch.enable_grad(): loss = closure() for group in self.param_groups: lr = group["lr"] for p in group["params"]: if p.grad is None: continue p.add_(p.grad, alpha=-lr) return loss def build_optimizer(params, config): return GradientDescent(params, lr=config.get("lr", 1e-2)) """ DEFAULT_CONFIG = {"lr": 1e-2} DESCRIPTION = "Plain gradient descent: parameter <- parameter - lr * gradient."