#!/usr/bin/env python3 import torch from tqdm import tqdm import numpy as np try: import tinycudann as tcnn except: pass # This script stress-tests the GPU memory arena of tiny-cuda-nn with randomly sized allocations and helped # find a bug in its interval arithmetic in the past. class TcnnFCBlock(tcnn.Network): def __init__( self, in_features, out_features, num_hidden_layers, hidden_features, activation:str='ReLU', last_activation:str='None', seed=42): assert hidden_features in [16, 32, 64, 128], "hidden_features can only be 16, 32, 64, or 128." super().__init__(in_features, out_features, network_config={ "otype": "FullyFusedMLP", # Component type. "activation": activation, # Activation of hidden layers. "output_activation": last_activation, # Activation of the output layer. "n_neurons": hidden_features, # Neurons in each hidden layer. # May only be 16, 32, 64, or 128. "n_hidden_layers": num_hidden_layers, # Number of hidden layers. }, seed=seed) def forward(self, x: torch.Tensor): prefix = x.shape[:-1] return super().forward(x.flatten(0,-2)).unflatten(0, prefix) device = torch.device('cuda:0') mlp = TcnnFCBlock(3, 256, 8, 128) for _ in range(10000): for n, p in mlp.named_parameters(): p.grad = None _x = np.random.randint(200, 1000, 1)[0] x = torch.rand([_x,1000,3], dtype=torch.float, device=device) # random setting #x = torch.rand([torch.randint(200,800,[1]).item(),100,3], dtype=torch.float, device=device) # setting 2 y = mlp.forward(x) y.mean().backward()