| import os |
| os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
| os.environ["CUDA_VISIBLE_DEVICES"] = "2" |
| import torch |
| import time |
| import random |
|
|
|
|
|
|
| def initialize_and_touch(tensor): |
| tensor.zero_() |
| torch.cuda.synchronize() |
|
|
| def dummy_compute(tensor): |
| result = torch.matmul(tensor, tensor.t()) |
| torch.cuda.synchronize() |
| return result |
|
|
| device = torch.device("cuda") |
| total_memory = torch.cuda.get_device_properties(device).total_memory |
| print(f"Total VRAM: {total_memory / (1024**3):.2f} GB") |
|
|
| allocated_tensors = [] |
| chunk_size_bytes = 4 * 1024**3 |
| element_size = torch.tensor([], dtype=torch.float32).element_size() |
| chunk_elements = chunk_size_bytes // element_size |
|
|
| |
| side = int(chunk_elements ** 0.5) |
|
|
| allocated = 0 |
| target = total_memory * 0.95 |
|
|
| print("Allocating and initializing memory...") |
| while allocated < target: |
| try: |
| |
| chunk = torch.empty((side, side), dtype=torch.float32, device=device) |
| initialize_and_touch(chunk) |
| allocated_tensors.append(chunk) |
| allocated += chunk_size_bytes |
| print(f"Allocated: {allocated / (1024**3):.2f} GB", end='\r') |
| except RuntimeError as e: |
| if 'out of memory' in str(e).lower(): |
| print(f"\nOut of memory after {allocated / (1024**3):.2f} GB") |
| break |
| else: |
| raise |
|
|
| print(f"\nHolding {allocated / (1024**3):.2f} GB in {len(allocated_tensors)} chunks.") |
| print("Running dummy compute every 30 seconds to show GPU utilization...") |
|
|
| compute_interval = 30 |
| last_compute = time.time() |
|
|
| while True: |
| now = time.time() |
| if now - last_compute >= compute_interval: |
| if allocated_tensors: |
| t = random.choice(allocated_tensors) |
| try: |
| side = min(t.shape[0], 8000) |
| _ = dummy_compute(t[:side, :side]) |
| print(f"[{time.strftime('%H:%M:%S')}] GPU compute spike (util ↑)") |
| except Exception as e: |
| print(f"Compute failed (expected if chunk too big): {e}") |
| last_compute = now |
|
|
| time.sleep(1) |
|
|