import torch
import time

GB = 1024**3
TARGET_GB = 28
device = "cuda:1"

torch.cuda.init()
torch.cuda.set_device(1)

# Use uint8 = 1 byte/element => simplest for exact sizing
n_bytes = TARGET_GB * GB
x = torch.empty(n_bytes, dtype=torch.uint8, device=device)

# Touch memory so CUDA actually commits pages
x.fill_(1)

torch.cuda.synchronize()
print(f"Allocated: {x.numel()/GB:.2f} GB on {device}")
print("Holding allocation... Ctrl+C to exit")

while True:
    time.sleep(60)