import torch import time GB = 1024**3 TARGET_GB = 28 device = "cuda:1" torch.cuda.init() torch.cuda.set_device(1) # Use uint8 = 1 byte/element => simplest for exact sizing n_bytes = TARGET_GB * GB x = torch.empty(n_bytes, dtype=torch.uint8, device=device) # Touch memory so CUDA actually commits pages x.fill_(1) torch.cuda.synchronize() print(f"Allocated: {x.numel()/GB:.2f} GB on {device}") print("Holding allocation... Ctrl+C to exit") while True: time.sleep(60)