import os
import json
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--g", type=str, default="2", help="GPU ID")
args = parser.parse_args()
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.g)

import torch
import time

# Set the specific GPU device (change the index if it's not GPU 0; check with nvidia-smi)
# torch.cuda.set_device(0)

# Get total memory in bytes (should be around 85e9 for A100 80GB, but use reported value)
total_memory = torch.cuda.get_device_properties(0).total_memory

# List to hold allocated tensors
allocated_tensors = []

# Chunk size: Allocate in 4GB chunks to avoid fragmentation issues (adjust if needed)
chunk_size_bytes = 4 * 1024**3  # 4 GiB
chunk_elements = chunk_size_bytes // torch.tensor([], dtype=torch.float32).element_size()

try:
    allocated = 0
    while allocated < total_memory * 0.85:  # Allocate up to 95% to leave some headroom
        chunk = torch.empty(chunk_elements, dtype=torch.float32, device='cuda')
        allocated_tensors.append(chunk)
        allocated += chunk_size_bytes
        # Optional: Touch the memory to force allocation
        chunk.zero_()
        torch.cuda.synchronize()
except RuntimeError as e:
    if 'out of memory' in str(e).lower():
        print(f"Allocated approximately {allocated / (1024**3):.2f} GB. Holding VRAM on A100.")
    else:
        raise e

# Hold the memory indefinitely
print("VRAM occupied. Running forever to hold it.")
while True:
    time.sleep(3600)  # Sleep 1 hour to minimize CPU usage; script will hold until killed