Upload infer_and_wait_all.sh with huggingface_hub
Browse files- infer_and_wait_all.sh +27 -0
infer_and_wait_all.sh
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
#!/bin/bash
|
| 3 |
+
GPU_ID=5
|
| 4 |
+
THRESHOLD=50000
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
function get_free_memory() {
|
| 8 |
+
nvidia-smi --query-gpu=memory.free --format=csv,nounits,noheader | sed -n "$((GPU_ID+1))p"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
while true; do
|
| 13 |
+
FREE_MEM=$(get_free_memory)
|
| 14 |
+
echo "GPU $GPU_ID free memory: ${FREE_MEM}MB"
|
| 15 |
+
if [ "$FREE_MEM" -gt "$THRESHOLD" ]; then
|
| 16 |
+
echo "Free memory is greater than 40GB. Running your script...."
|
| 17 |
+
|
| 18 |
+
# nohup bash train_demo_3456.sh > nohup_all_coco_v2_phi35_lre4_e1_0913.out &
|
| 19 |
+
|
| 20 |
+
nohup python target_all.py --size 400 --gpus 2 --interval 0.01 &
|
| 21 |
+
|
| 22 |
+
break
|
| 23 |
+
fi
|
| 24 |
+
# 设定检查间隔,例如每隔5分钟检查一次
|
| 25 |
+
sleep 10
|
| 26 |
+
done
|
| 27 |
+
|