File size: 2,096 Bytes
1db7196 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import torch
import time
import random
def initialize_and_touch(tensor):
tensor.zero_()
torch.cuda.synchronize()
def dummy_compute(tensor):
result = torch.matmul(tensor, tensor.t())
torch.cuda.synchronize()
return result
device = torch.device("cuda")
total_memory = torch.cuda.get_device_properties(device).total_memory
print(f"Total VRAM: {total_memory / (1024**3):.2f} GB")
allocated_tensors = []
chunk_size_bytes = 4 * 1024**3 # 4 GiB
element_size = torch.tensor([], dtype=torch.float32).element_size()
chunk_elements = chunk_size_bytes // element_size
# Make the chunk roughly square
side = int(chunk_elements ** 0.5)
allocated = 0
target = total_memory * 0.95
print("Allocating and initializing memory...")
while allocated < target:
try:
# Allocate a 2D tensor
chunk = torch.empty((side, side), dtype=torch.float32, device=device)
initialize_and_touch(chunk)
allocated_tensors.append(chunk)
allocated += chunk_size_bytes
print(f"Allocated: {allocated / (1024**3):.2f} GB", end='\r')
except RuntimeError as e:
if 'out of memory' in str(e).lower():
print(f"\nOut of memory after {allocated / (1024**3):.2f} GB")
break
else:
raise
print(f"\nHolding {allocated / (1024**3):.2f} GB in {len(allocated_tensors)} chunks.")
print("Running dummy compute every 30 seconds to show GPU utilization...")
compute_interval = 30
last_compute = time.time()
while True:
now = time.time()
if now - last_compute >= compute_interval:
if allocated_tensors:
t = random.choice(allocated_tensors)
try:
side = min(t.shape[0], 8000)
_ = dummy_compute(t[:side, :side])
print(f"[{time.strftime('%H:%M:%S')}] GPU compute spike (util ↑)")
except Exception as e:
print(f"Compute failed (expected if chunk too big): {e}")
last_compute = now
time.sleep(1)
|