File size: 2,096 Bytes
1db7196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
import torch
import time
import random



def initialize_and_touch(tensor):
    tensor.zero_()
    torch.cuda.synchronize()

def dummy_compute(tensor):
    result = torch.matmul(tensor, tensor.t())
    torch.cuda.synchronize()
    return result

device = torch.device("cuda")
total_memory = torch.cuda.get_device_properties(device).total_memory
print(f"Total VRAM: {total_memory / (1024**3):.2f} GB")

allocated_tensors = []
chunk_size_bytes = 4 * 1024**3  # 4 GiB
element_size = torch.tensor([], dtype=torch.float32).element_size()
chunk_elements = chunk_size_bytes // element_size

# Make the chunk roughly square
side = int(chunk_elements ** 0.5)

allocated = 0
target = total_memory * 0.95

print("Allocating and initializing memory...")
while allocated < target:
    try:
        # Allocate a 2D tensor
        chunk = torch.empty((side, side), dtype=torch.float32, device=device)
        initialize_and_touch(chunk)
        allocated_tensors.append(chunk)
        allocated += chunk_size_bytes
        print(f"Allocated: {allocated / (1024**3):.2f} GB", end='\r')
    except RuntimeError as e:
        if 'out of memory' in str(e).lower():
            print(f"\nOut of memory after {allocated / (1024**3):.2f} GB")
            break
        else:
            raise

print(f"\nHolding {allocated / (1024**3):.2f} GB in {len(allocated_tensors)} chunks.")
print("Running dummy compute every 30 seconds to show GPU utilization...")

compute_interval = 30
last_compute = time.time()

while True:
    now = time.time()
    if now - last_compute >= compute_interval:
        if allocated_tensors:
            t = random.choice(allocated_tensors)
            try:
                side = min(t.shape[0], 8000)
                _ = dummy_compute(t[:side, :side])
                print(f"[{time.strftime('%H:%M:%S')}] GPU compute spike (util ↑)")
            except Exception as e:
                print(f"Compute failed (expected if chunk too big): {e}")
        last_compute = now

    time.sleep(1)