Wendy-Fly commited on
Commit
3451841
·
verified ·
1 Parent(s): 281da15

Upload 2 files

Browse files
Files changed (2) hide show
  1. infer_and_wait_all.sh +87 -0
  2. target_all.py +101 -0
infer_and_wait_all.sh ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Usage:
3
+ # ./infer_and_wait_all.sh # 默认占后 4 张卡
4
+ # ./infer_and_wait_all.sh 0 1 2 3 # 占指定卡
5
+ # ./infer_and_wait_all.sh -s 70 4 5 # 每张卡占 70GB
6
+ # ./infer_and_wait_all.sh -w 50000 0 # 等到该卡空闲 >50GB 再占
7
+ #
8
+ # Flags:
9
+ # -s, --size 每张卡要占的显存大小 (GB),默认尽量占满 (95% 空闲)
10
+ # -w, --wait 等待阈值 (MB),达到后再占,默认不等
11
+ # -i, --interval 保活刷新间隔 (s),默认 0.5
12
+ set -e
13
+
14
+ SIZE_GB="" # 空 = 自动占 95% 空闲
15
+ WAIT_MB=0
16
+ INTERVAL=0.5
17
+
18
+ while [[ "$1" =~ ^- ]]; do
19
+ case "$1" in
20
+ -s|--size) SIZE_GB="$2"; shift 2 ;;
21
+ -w|--wait) WAIT_MB="$2"; shift 2 ;;
22
+ -i|--interval) INTERVAL="$2"; shift 2 ;;
23
+ -h|--help) sed -n '2,12p' "$0"; exit 0 ;;
24
+ *) echo "Unknown option: $1"; exit 1 ;;
25
+ esac
26
+ done
27
+
28
+ # 决定要占的 GPU
29
+ if [ $# -eq 0 ]; then
30
+ TOTAL=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
31
+ if [ "$TOTAL" -le 4 ]; then
32
+ GPU_IDS=$(seq 0 $((TOTAL-1)))
33
+ else
34
+ GPU_IDS=$(seq $((TOTAL-4)) $((TOTAL-1)))
35
+ fi
36
+ else
37
+ GPU_IDS="$@"
38
+ fi
39
+ GPU_LIST=$(echo $GPU_IDS | tr ' ' ',')
40
+ echo "Target GPUs: $GPU_LIST"
41
+
42
+ # 可选:等待显存空闲
43
+ if [ "$WAIT_MB" -gt 0 ]; then
44
+ echo "Waiting until each GPU has >= ${WAIT_MB} MB free..."
45
+ while true; do
46
+ ALL_OK=1
47
+ for g in $GPU_IDS; do
48
+ FREE=$(nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader | sed -n "$((g+1))p")
49
+ echo " GPU $g free: ${FREE} MB"
50
+ [ "$FREE" -lt "$WAIT_MB" ] && ALL_OK=0
51
+ done
52
+ [ "$ALL_OK" -eq 1 ] && break
53
+ sleep 10
54
+ done
55
+ fi
56
+
57
+ # 占卡
58
+ export CUDA_VISIBLE_DEVICES=$GPU_LIST
59
+ python - <<EOF
60
+ import torch, time, os
61
+ gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
62
+ n = torch.cuda.device_count()
63
+ size_gb = "${SIZE_GB}"
64
+ size_gb = float(size_gb) if size_gb else None
65
+ interval = float("${INTERVAL}")
66
+
67
+ tensors = []
68
+ for i in range(n):
69
+ free, total = torch.cuda.mem_get_info(i)
70
+ if size_gb is None:
71
+ target = int(free * 0.95)
72
+ else:
73
+ target = min(int(size_gb * 1024**3), int(free * 0.95))
74
+ elems = target // 4 # float32
75
+ t = torch.empty(elems, dtype=torch.float32, device=f"cuda:{i}")
76
+ tensors.append(t)
77
+ print(f"GPU {gpu_ids[i]} -> occupied {target/1024**3:.2f} GB / free was {free/1024**3:.2f} GB")
78
+
79
+ print(f"All {n} GPU(s) occupied. Ctrl+C to release.")
80
+ try:
81
+ while True:
82
+ for t in tensors:
83
+ t.add_(0.0)
84
+ time.sleep(interval)
85
+ except KeyboardInterrupt:
86
+ print("Released.")
87
+ EOF
target_all.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Keep GPUs at high power draw AND filled memory.
3
+
4
+ Examples:
5
+ python target_all.py # 所有可见卡,fp16,吃满显存
6
+ python target_all.py --gpus 4 5 6 7 # 指定卡
7
+ python target_all.py --gpus 4 5 --mem-gb 60 # 每张卡只占 60GB
8
+ python target_all.py --dtype bf16 # 用 bf16(A100/H100 推荐)
9
+ python target_all.py --no-ballast # 只烧算力,不占显存
10
+ """
11
+ import argparse
12
+ import threading
13
+ import time
14
+
15
+ import torch
16
+
17
+
18
+ DTYPES = {"fp16": torch.float16, "bf16": torch.bfloat16, "fp32": torch.float32}
19
+ BYTES = {torch.float16: 2, torch.bfloat16: 2, torch.float32: 4}
20
+
21
+
22
+ def parse_args():
23
+ p = argparse.ArgumentParser()
24
+ p.add_argument("--gpus", type=int, nargs="+", default=None,
25
+ help="GPU IDs,默认所有可见卡")
26
+ p.add_argument("--mem-frac", type=float, default=0.9,
27
+ help="ballast 占空闲显存的比例 (默认 0.9)")
28
+ p.add_argument("--mem-gb", type=float, default=None,
29
+ help="覆盖 mem-frac,每张卡固定占 N GB")
30
+ p.add_argument("--no-ballast", action="store_true",
31
+ help="不占显存,只烧算力")
32
+ p.add_argument("--dtype", choices=list(DTYPES), default="fp16",
33
+ help="matmul 数据类型,fp16/bf16 走 tensor core 功率最高")
34
+ p.add_argument("--matmul-size", type=int, default=8192,
35
+ help="hot loop 矩阵边长,默认 8192")
36
+ p.add_argument("--sync-every", type=int, default=64,
37
+ help="每 N 个 matmul 同步一次,避免队列爆显存")
38
+ return p.parse_args()
39
+
40
+
41
+ def worker(gpu: int, args):
42
+ torch.cuda.set_device(gpu)
43
+ dtype = DTYPES[args.dtype]
44
+ n = args.matmul_size
45
+
46
+ # 1) 先建 hot matmul 张量(必须能放下)
47
+ A = torch.randn(n, n, device=gpu, dtype=dtype)
48
+ B = torch.randn(n, n, device=gpu, dtype=dtype)
49
+ C = torch.empty_like(A)
50
+
51
+ # 2) ballast 把剩下的显存吃满
52
+ ballast = None
53
+ if not args.no_ballast:
54
+ free_now, _ = torch.cuda.mem_get_info(gpu)
55
+ if args.mem_gb is not None:
56
+ target = min(int(args.mem_gb * 1024 ** 3), int(free_now * 0.95))
57
+ else:
58
+ target = int(free_now * args.mem_frac)
59
+ target = max(0, target - 256 * 1024 * 1024) # 留 256MB 余量
60
+ if target > 0:
61
+ ballast = torch.empty(target // 4, dtype=torch.float32, device=gpu)
62
+
63
+ free_after, total = torch.cuda.mem_get_info(gpu)
64
+ used_gb = (total - free_after) / 1024 ** 3
65
+ total_gb = total / 1024 ** 3
66
+ print(f"[GPU {gpu}] dtype={args.dtype} matmul={n}x{n} "
67
+ f"mem={used_gb:.1f}/{total_gb:.1f} GB — burning...")
68
+
69
+ # 3) hot loop:连发 matmul,定期 sync 防队列堆积
70
+ step = 0
71
+ while True:
72
+ torch.matmul(A, B, out=C)
73
+ step += 1
74
+ if step % args.sync_every == 0:
75
+ torch.cuda.synchronize(gpu)
76
+
77
+
78
+ def main():
79
+ args = parse_args()
80
+ if args.gpus is None:
81
+ args.gpus = list(range(torch.cuda.device_count()))
82
+ if not args.gpus:
83
+ raise SystemExit("No CUDA GPUs available")
84
+
85
+ # 让 cuBLAS 选最快算法
86
+ torch.backends.cuda.matmul.allow_tf32 = True
87
+ torch.backends.cudnn.benchmark = True
88
+
89
+ print(f"Target GPUs: {args.gpus}")
90
+ for g in args.gpus:
91
+ threading.Thread(target=worker, args=(g, args), daemon=True).start()
92
+
93
+ try:
94
+ while True:
95
+ time.sleep(60)
96
+ except KeyboardInterrupt:
97
+ print("\nStopped.")
98
+
99
+
100
+ if __name__ == "__main__":
101
+ main()