| import os | |
| import json | |
| import argparse | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--g", type=str, default="2", help="GPU ID") | |
| args = parser.parse_args() | |
| os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" | |
| os.environ["CUDA_VISIBLE_DEVICES"] = str(args.g) | |
| import torch | |
| import time | |
| # Set the specific GPU device (change the index if it's not GPU 0; check with nvidia-smi) | |
| # torch.cuda.set_device(0) | |
| # Get total memory in bytes (should be around 85e9 for A100 80GB, but use reported value) | |
| total_memory = torch.cuda.get_device_properties(0).total_memory | |
| # List to hold allocated tensors | |
| allocated_tensors = [] | |
| # Chunk size: Allocate in 4GB chunks to avoid fragmentation issues (adjust if needed) | |
| chunk_size_bytes = 4 * 1024**3 # 4 GiB | |
| chunk_elements = chunk_size_bytes // torch.tensor([], dtype=torch.float32).element_size() | |
| try: | |
| allocated = 0 | |
| while allocated < total_memory * 0.85: # Allocate up to 95% to leave some headroom | |
| chunk = torch.empty(chunk_elements, dtype=torch.float32, device='cuda') | |
| allocated_tensors.append(chunk) | |
| allocated += chunk_size_bytes | |
| # Optional: Touch the memory to force allocation | |
| chunk.zero_() | |
| torch.cuda.synchronize() | |
| except RuntimeError as e: | |
| if 'out of memory' in str(e).lower(): | |
| print(f"Allocated approximately {allocated / (1024**3):.2f} GB. Holding VRAM on A100.") | |
| else: | |
| raise e | |
| # Hold the memory indefinitely | |
| print("VRAM occupied. Running forever to hold it.") | |
| while True: | |
| time.sleep(3600) # Sleep 1 hour to minimize CPU usage; script will hold until killed |