import os import json import argparse parser = argparse.ArgumentParser() parser.add_argument("--g", type=str, default="2", help="GPU ID") args = parser.parse_args() os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(args.g) import torch import time # Set the specific GPU device (change the index if it's not GPU 0; check with nvidia-smi) # torch.cuda.set_device(0) # Get total memory in bytes (should be around 85e9 for A100 80GB, but use reported value) total_memory = torch.cuda.get_device_properties(0).total_memory # List to hold allocated tensors allocated_tensors = [] # Chunk size: Allocate in 4GB chunks to avoid fragmentation issues (adjust if needed) chunk_size_bytes = 4 * 1024**3 # 4 GiB chunk_elements = chunk_size_bytes // torch.tensor([], dtype=torch.float32).element_size() try: allocated = 0 while allocated < total_memory * 0.85: # Allocate up to 95% to leave some headroom chunk = torch.empty(chunk_elements, dtype=torch.float32, device='cuda') allocated_tensors.append(chunk) allocated += chunk_size_bytes # Optional: Touch the memory to force allocation chunk.zero_() torch.cuda.synchronize() except RuntimeError as e: if 'out of memory' in str(e).lower(): print(f"Allocated approximately {allocated / (1024**3):.2f} GB. Holding VRAM on A100.") else: raise e # Hold the memory indefinitely print("VRAM occupied. Running forever to hold it.") while True: time.sleep(3600) # Sleep 1 hour to minimize CPU usage; script will hold until killed