Spaces:
Runtime error
Runtime error
Yixin Liu
commited on
Commit
·
bb690a1
1
Parent(s):
f1afb35
upload
Browse files- __pycache__/config.cpython-39.pyc +0 -0
- __pycache__/parse_code.cpython-39.pyc +0 -0
- config.py +107 -0
- gpu_utility.sh +7 -7
- main.py +139 -122
- parse_code.py +61 -0
- res/20230615-17h44m58s/gpu_utility.sh +118 -0
- res/20230615-17h44m58s/script.sh +29 -0
- res/20230615-17h45m38s/gpu_utility.sh +118 -0
- res/20230615-17h45m38s/script.sh +29 -0
- res/20230615-17h46m42s/gpu_utility.sh +118 -0
- res/20230615-17h46m42s/script.sh +29 -0
- res/20230615-17h48m58s.zip +0 -0
- res/20230615-17h48m58s/gpu_utility.sh +118 -0
- res/20230615-17h48m58s/script.sh +29 -0
- res/20230615-17h49m08s.zip +0 -0
- res/20230615-17h49m08s/gpu_utility.sh +118 -0
- res/20230615-17h49m08s/script.sh +29 -0
- res/20230615-17h49m45s/gpu_utility.sh +118 -0
- res/20230615-17h49m45s/script.sh +29 -0
- res/20230615-17h50m13s/gpu_utility.sh +118 -0
- res/20230615-17h50m13s/script.sh +29 -0
- res/20230615-17h50m22s/gpu_utility.sh +118 -0
- res/20230615-17h50m22s/script.sh +29 -0
- res/20230615-17h50m57s/gpu_utility.sh +118 -0
- res/20230615-17h50m57s/script.sh +29 -0
- res/20230615-17h51m33s/gpu_utility.sh +118 -0
- res/20230615-17h51m33s/script.sh +29 -0
- res/20230615-17h51m43s/gpu_utility.sh +118 -0
- res/20230615-17h51m43s/script.sh +29 -0
- res/20230615-17h53m28s/gpu_utility.sh +118 -0
- res/20230615-17h53m28s/script.sh +25 -0
- res/20230615-17h53m44s/gpu_utility.sh +118 -0
- res/20230615-17h53m44s/script.sh +24 -0
- res/20230615-17h55m17s/gpu_utility.sh +118 -0
- res/20230615-17h55m17s/script.sh +23 -0
- res/20230615-17h56m22s/gpu_utility.sh +118 -0
- res/20230615-17h56m22s/script.sh +23 -0
__pycache__/config.cpython-39.pyc
ADDED
|
Binary file (5.37 kB). View file
|
|
|
__pycache__/parse_code.cpython-39.pyc
ADDED
|
Binary file (1.61 kB). View file
|
|
|
config.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
update_device_command = "update_device_idx;\n"
|
| 3 |
+
|
| 4 |
+
update_device_func = """
|
| 5 |
+
function update_device_idx {
|
| 6 |
+
if [ $constrain_total = true ]; then
|
| 7 |
+
# check total cpu usage
|
| 8 |
+
while true; do
|
| 9 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 10 |
+
sleep 1
|
| 11 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 12 |
+
sleep 1
|
| 13 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 14 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 15 |
+
|
| 16 |
+
# if currently cpu usage is less than the threshold, then break
|
| 17 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 18 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 19 |
+
break
|
| 20 |
+
else
|
| 21 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 22 |
+
sleep 10
|
| 23 |
+
fi
|
| 24 |
+
done;
|
| 25 |
+
|
| 26 |
+
# check total memory usage
|
| 27 |
+
while true; do
|
| 28 |
+
# get memory usage of whole system
|
| 29 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 30 |
+
sleep 1
|
| 31 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 32 |
+
sleep 1
|
| 33 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 34 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 35 |
+
|
| 36 |
+
# echo $mem_used
|
| 37 |
+
# get rate of memory usage
|
| 38 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 39 |
+
# echo $mem_rate
|
| 40 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 41 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 42 |
+
break
|
| 43 |
+
else
|
| 44 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 45 |
+
sleep 10
|
| 46 |
+
fi
|
| 47 |
+
done;
|
| 48 |
+
fi;
|
| 49 |
+
|
| 50 |
+
# if constrain_mine
|
| 51 |
+
if [ $constrain_mine = true ]; then
|
| 52 |
+
|
| 53 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 54 |
+
while true; do
|
| 55 |
+
username=$username_mine
|
| 56 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 57 |
+
# echo $cpu_usage_user_sum
|
| 58 |
+
total_aviable_cpu=$(nproc)
|
| 59 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 60 |
+
# echo $total_aviable_cpu
|
| 61 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 62 |
+
# echo $cpu_usage_user_ratio
|
| 63 |
+
|
| 64 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 65 |
+
# echo $memory_usage_user_sum
|
| 66 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 67 |
+
# echo $memory_usage_total
|
| 68 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 69 |
+
# echo $memory_usage_user_ratio
|
| 70 |
+
|
| 71 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 72 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 73 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 74 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 75 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 76 |
+
break
|
| 77 |
+
else
|
| 78 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 79 |
+
sleep 10
|
| 80 |
+
fi
|
| 81 |
+
done;
|
| 82 |
+
fi;
|
| 83 |
+
|
| 84 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 85 |
+
while true; do
|
| 86 |
+
current_device_idx=$((current_device_idx+1))
|
| 87 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 88 |
+
# reset
|
| 89 |
+
current_device_idx=0
|
| 90 |
+
fi
|
| 91 |
+
# check whether this device is fully booked using nvidia-smi
|
| 92 |
+
# get the gpu current memory usage
|
| 93 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 94 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 95 |
+
|
| 96 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 97 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 98 |
+
sleep 3
|
| 99 |
+
continue
|
| 100 |
+
else
|
| 101 |
+
break
|
| 102 |
+
fi
|
| 103 |
+
done
|
| 104 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 105 |
+
device=${available_devices[$current_device_idx]}
|
| 106 |
+
}
|
| 107 |
+
"""
|
gpu_utility.sh
CHANGED
|
@@ -1,16 +1,16 @@
|
|
| 1 |
|
| 2 |
-
|
| 3 |
max_gpu_utilization=90
|
| 4 |
-
|
| 5 |
max_gpu_memory_gap=5000
|
| 6 |
-
available_devices=( 0 1 2 3 4
|
| 7 |
current_device_idx=-1
|
| 8 |
-
sleeptime=
|
| 9 |
cpu_mean_max=77
|
| 10 |
memory_rate_max=80
|
| 11 |
-
constrain_total=
|
| 12 |
-
constrain_mine=
|
| 13 |
-
constrain_rate=
|
| 14 |
|
| 15 |
|
| 16 |
|
|
|
|
| 1 |
|
| 2 |
+
username=yila22
|
| 3 |
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
cpu_mean_max=77
|
| 10 |
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
|
| 15 |
|
| 16 |
|
main.py
CHANGED
|
@@ -4,144 +4,161 @@ import streamlit as st
|
|
| 4 |
import pandas as pd
|
| 5 |
import numpy as np
|
| 6 |
import time
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
# import plotly.figure_factory as ff
|
| 10 |
-
# import altair as alt
|
| 11 |
-
# from PIL import Image
|
| 12 |
-
# import base64
|
| 13 |
-
# import tarfile
|
| 14 |
-
# import os
|
| 15 |
-
# import requests
|
| 16 |
-
|
| 17 |
|
| 18 |
|
| 19 |
# title
|
| 20 |
st.title("Exp Command Generator")
|
| 21 |
|
| 22 |
# experiment mode
|
| 23 |
-
exp_mode = st.sidebar.selectbox("Select Experiment Mode", ["OneExpOnecard", "MultipleExpOnecard"],key="MultipleExpOnecard")
|
| 24 |
|
| 25 |
## 检查框
|
| 26 |
-
debug = st.sidebar.checkbox("Debug
|
| 27 |
# st.sidebar.write(f"checkbox的值是{res}")
|
| 28 |
|
| 29 |
-
setup = st.sidebar.text_area("Some setup of env at beginning.", """cd $(dirname $(dirname $0))
|
| 30 |
-
source activate xai
|
| 31 |
-
export PYTHONPATH=${PYTHONPATH}:/Users/apple/Desktop/workspace/research_project/attention:/mnt/yixin/:/home/yila22/prj""")
|
| 32 |
-
|
| 33 |
-
exp_hyper = st.sidebar.text_area("Hyperparameters", """exp_name="debug-adv-training-emotion"
|
| 34 |
-
dataset=emotion
|
| 35 |
-
n_epoch=3
|
| 36 |
-
K=3
|
| 37 |
-
encoder=bert
|
| 38 |
-
lambda_1=1
|
| 39 |
-
lambda_2=1
|
| 40 |
-
x_pgd_radius=0.01
|
| 41 |
-
pgd_radius=0.001
|
| 42 |
-
seed=2
|
| 43 |
-
bsize=8
|
| 44 |
-
lr=5e-5""")
|
| 45 |
|
| 46 |
## gpu 相关参数
|
| 47 |
-
gpu_list = st.sidebar.multiselect("multi select", range(10), [
|
| 48 |
# print(gpu_list)
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
g = st.button("Generate")
|
| 92 |
if g:
|
| 93 |
-
s = ""
|
| 94 |
-
s += setup + "\n\n"
|
| 95 |
-
s += exp_hyper + "\n\n"
|
| 96 |
-
s += gpu_hyper + "\n\n"
|
| 97 |
-
s += hyper_loop + "\n\n"
|
| 98 |
-
s += """
|
| 99 |
-
while true; do
|
| 100 |
-
gpu_id=${gpu[$i]}
|
| 101 |
-
# nvidia-smi --query-gpu=utilization.gpu --format=csv -i 2 | grep -Eo "[0-9]+"
|
| 102 |
-
gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 103 |
-
free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 104 |
-
if [[ $free_mem -lt $allow_gpu_memory_threshold || $gpu_u -ge ${gpu_threshold} ]]; then
|
| 105 |
-
i=`expr $i + 1`
|
| 106 |
-
i=`expr $i % $gpunum`
|
| 107 |
-
echo "gpu id ${gpu[$i]} is full loaded, skip"
|
| 108 |
-
if [ "$i" == "0" ]; then
|
| 109 |
-
sleep ${all_full_sleep_time}
|
| 110 |
-
echo "all the gpus are full, sleep 1m"
|
| 111 |
-
fi
|
| 112 |
-
else
|
| 113 |
-
break
|
| 114 |
-
fi
|
| 115 |
-
done
|
| 116 |
-
|
| 117 |
-
gpu_id=${gpu[$i]}
|
| 118 |
-
# search from the next gpu
|
| 119 |
-
i=`expr $i + 1`
|
| 120 |
-
i=`expr $i % $gpunum`
|
| 121 |
-
|
| 122 |
-
free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 123 |
-
gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 124 |
-
export CUDA_VISIBLE_DEVICES=$gpu_id
|
| 125 |
-
echo "use gpu id is ${gpu[$i]}, free memory is $free_mem, it utilization is ${gpu_u}%"
|
| 126 |
-
"""
|
| 127 |
-
s += f"""com="{python_cmd}"\n"""
|
| 128 |
-
s += "echo $com\n"
|
| 129 |
-
s += "echo ==========================================================================================\n"
|
| 130 |
-
if debug:
|
| 131 |
-
s += "$com\n"
|
| 132 |
-
s += "# mkdir -p ./logs/\n"
|
| 133 |
-
s += "# nohup $com > ./logs/$exp_name-$RANDOM.log 2>&1 &\n"
|
| 134 |
-
else:
|
| 135 |
-
s += "# $com\n"
|
| 136 |
-
s += "mkdir -p ./logs/\n"
|
| 137 |
-
s += "nohup $com > ./logs/$exp_name-$RANDOM.log 2>&1 &\n"
|
| 138 |
-
s += """echo "sleep for $sleep_time_after_loading_task to wait the task loaded"
|
| 139 |
-
sleep $sleep_time_after_loading_task\n"""
|
| 140 |
-
s += end_loop
|
| 141 |
st.success("Finished")
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
|
|
|
|
| 4 |
import pandas as pd
|
| 5 |
import numpy as np
|
| 6 |
import time
|
| 7 |
+
from config import update_device_func
|
| 8 |
+
from parse_code import parse_base_code
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
# title
|
| 12 |
st.title("Exp Command Generator")
|
| 13 |
|
| 14 |
# experiment mode
|
| 15 |
+
# exp_mode = st.sidebar.selectbox("Select Experiment Mode", ["OneExpOnecard", "MultipleExpOnecard"],key="MultipleExpOnecard")
|
| 16 |
|
| 17 |
## 检查框
|
| 18 |
+
debug = st.sidebar.checkbox("Debug: 选择则会串行地执行命令", value=True)
|
| 19 |
# st.sidebar.write(f"checkbox的值是{res}")
|
| 20 |
|
| 21 |
+
# setup = st.sidebar.text_area("Some setup of env at beginning.", """cd $(dirname $(dirname $0))
|
| 22 |
+
# source activate xai
|
| 23 |
+
# export PYTHONPATH=${PYTHONPATH}:/Users/apple/Desktop/workspace/research_project/attention:/mnt/yixin/:/home/yila22/prj""")
|
| 24 |
+
|
| 25 |
+
# exp_hyper = st.sidebar.text_area("Hyperparameters", """exp_name="debug-adv-training-emotion"
|
| 26 |
+
# dataset=emotion
|
| 27 |
+
# n_epoch=3
|
| 28 |
+
# K=3
|
| 29 |
+
# encoder=bert
|
| 30 |
+
# lambda_1=1
|
| 31 |
+
# lambda_2=1
|
| 32 |
+
# x_pgd_radius=0.01
|
| 33 |
+
# pgd_radius=0.001
|
| 34 |
+
# seed=2
|
| 35 |
+
# bsize=8
|
| 36 |
+
# lr=5e-5""")
|
| 37 |
|
| 38 |
## gpu 相关参数
|
| 39 |
+
gpu_list = st.sidebar.multiselect("multi select", range(10), [0,1,2,3,4,])
|
| 40 |
# print(gpu_list)
|
| 41 |
+
allow_gpu_memory_threshold_default=5000
|
| 42 |
+
gpu_threshold_default=90
|
| 43 |
+
total_gpu_memory = st.sidebar.number_input("单卡总容量", value=24564, min_value=0, max_value=30000, step=1000)
|
| 44 |
+
max_gpu_memory_gap = st.sidebar.number_input("最小单卡剩余容量", value=allow_gpu_memory_threshold_default, min_value=0, max_value=total_gpu_memory, step=500)
|
| 45 |
+
max_gpu_utilization = st.sidebar.number_input("最大单卡利用率", value=gpu_threshold_default, min_value=0, max_value=100, step=10)
|
| 46 |
+
sleep_time_after_loading_task= st.sidebar.number_input("加载任务后等待秒数", value=10, min_value=0,step=5)
|
| 47 |
+
# all_full_sleep_time = st.sidebar.number_input("全满之后等待秒数", value=20, min_value=0,step=5)
|
| 48 |
+
username = st.sidebar.text_input("用户名", value="yila22")
|
| 49 |
+
cpu_max_utility = st.sidebar.number_input("cpu最大利用率", value=77, min_value=0, max_value=100, step=1)
|
| 50 |
+
memory_max_utility = st.sidebar.number_input("内存最大利用率", value=80, min_value=0, max_value=100, step=1)
|
| 51 |
+
constrain_total = st.sidebar.checkbox("限制总资源", value=True)
|
| 52 |
+
constrain_mine = st.sidebar.checkbox("限制我的资源", value=False)
|
| 53 |
+
constrain_rate = st.sidebar.number_input("限制率", value=2, min_value=1, max_value=10, step=1)
|
| 54 |
+
|
| 55 |
+
# username_mine=root
|
| 56 |
+
# max_gpu_utilization=90
|
| 57 |
+
# total_gpu_memory=24564
|
| 58 |
+
# max_gpu_memory_gap=5000
|
| 59 |
+
# available_devices=( 0 1 2 3 4 5 6 7 8 9 )
|
| 60 |
+
# current_device_idx=-1
|
| 61 |
+
# sleeptime=30
|
| 62 |
+
# cpu_mean_max=77
|
| 63 |
+
# memory_rate_max=80
|
| 64 |
+
# constrain_total=true
|
| 65 |
+
# constrain_mine=false
|
| 66 |
+
# constrain_rate=2
|
| 67 |
+
gpu_list = " ".join([str(i) for i in gpu_list])
|
| 68 |
+
setup_for_gpu_utility = f"""
|
| 69 |
+
username={username}
|
| 70 |
+
max_gpu_utilization={max_gpu_utilization}
|
| 71 |
+
total_gpu_memory={total_gpu_memory}
|
| 72 |
+
max_gpu_memory_gap={max_gpu_memory_gap}
|
| 73 |
+
available_devices=( {gpu_list} )
|
| 74 |
+
current_device_idx=-1
|
| 75 |
+
sleeptime={sleep_time_after_loading_task}
|
| 76 |
+
cpu_mean_max={cpu_max_utility}
|
| 77 |
+
memory_rate_max={memory_max_utility}
|
| 78 |
+
constrain_total={constrain_total}
|
| 79 |
+
constrain_mine={constrain_mine}
|
| 80 |
+
constrain_rate={constrain_rate}
|
| 81 |
+
"""
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
base_code = st.text_area("Base Code", """##### setup
|
| 85 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 86 |
+
source activate /data/yixin/anaconda/mib
|
| 87 |
+
exp_name="single_user"
|
| 88 |
+
#####
|
| 89 |
+
|
| 90 |
+
##### loop
|
| 91 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 92 |
+
for dataset_idx in 0 1 2; do
|
| 93 |
+
#####
|
| 94 |
+
|
| 95 |
+
##### main
|
| 96 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 \
|
| 97 |
+
--loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name \
|
| 98 |
+
--log_wb
|
| 99 |
+
#####
|
| 100 |
+
|
| 101 |
+
#####
|
| 102 |
+
done;done;
|
| 103 |
+
#####""", height=400)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
|
| 107 |
g = st.button("Generate")
|
| 108 |
if g:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
st.success("Finished")
|
| 110 |
+
contents = base_code
|
| 111 |
+
gpu_utility = ""
|
| 112 |
+
gpu_utility = setup_for_gpu_utility + "\n\n" + update_device_func
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
new_code = parse_base_code(contents, debug=debug)
|
| 116 |
+
|
| 117 |
+
# create file for download
|
| 118 |
+
timestr = time.strftime("%Y%m%d-%Hh%Mm%Ss")
|
| 119 |
+
import os
|
| 120 |
+
os.makedirs(f"./res/{timestr}", exist_ok=True)
|
| 121 |
+
filename_script = f"./res/{timestr}/script.sh"
|
| 122 |
+
with open(filename_script, "w") as f:
|
| 123 |
+
f.write(new_code)
|
| 124 |
+
filename_config = f"./res/{timestr}/gpu_utility.sh"
|
| 125 |
+
with open(filename_config, "w") as f:
|
| 126 |
+
f.write(gpu_utility)
|
| 127 |
+
|
| 128 |
+
# zip them into one file
|
| 129 |
+
# import shutil
|
| 130 |
+
# shutil.make_archive(f"./res/{timestr}", 'zip', f"./res/{timestr}")
|
| 131 |
+
# st.download_button(
|
| 132 |
+
# label="Download zip",
|
| 133 |
+
# data=f"./res/{timestr}.zip",
|
| 134 |
+
# file_name=f"{timestr}.zip",
|
| 135 |
+
# mime="application/zip",
|
| 136 |
+
# )
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
st.download_button(
|
| 140 |
+
label="Download script",
|
| 141 |
+
data=new_code,
|
| 142 |
+
file_name=filename_script,
|
| 143 |
+
mime="text/plain",
|
| 144 |
+
)
|
| 145 |
+
# after clicking i don't want the website to refresh
|
| 146 |
+
st.download_button(
|
| 147 |
+
label="Download gpu_utility.sh",
|
| 148 |
+
data=gpu_utility,
|
| 149 |
+
file_name=filename_config,
|
| 150 |
+
mime="text/plain",
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# st.markdown(f"### [Download script](./{filename_script})")
|
| 154 |
+
# st.markdown(f"### [Download gpu_utility.sh](P{filename_config})")
|
| 155 |
+
st.markdown("## script.sh")
|
| 156 |
+
st.code(new_code, language="shell")
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
st.markdown("## gpu_utility.sh")
|
| 160 |
+
st.code(gpu_utility, language="shell")
|
| 161 |
+
|
| 162 |
+
|
| 163 |
|
| 164 |
|
parse_code.py
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from config import update_device_command
|
| 2 |
+
|
| 3 |
+
def parse_base_code(contents, debug = False):
|
| 4 |
+
import re
|
| 5 |
+
indexes = [m.start() for m in re.finditer('#####', contents)]
|
| 6 |
+
|
| 7 |
+
assert len(indexes) % 2 == 0
|
| 8 |
+
|
| 9 |
+
# split to span
|
| 10 |
+
spans = []
|
| 11 |
+
# spans.append(contents[:indexes[0]])
|
| 12 |
+
for i in range(len(indexes)):
|
| 13 |
+
if i != len(indexes) - 1:
|
| 14 |
+
spans.append(contents[indexes[i]:indexes[i+1]])
|
| 15 |
+
# spans.append(contents[indexes[-1]:])
|
| 16 |
+
|
| 17 |
+
spans_with_type = [
|
| 18 |
+
|
| 19 |
+
]
|
| 20 |
+
for span in spans:
|
| 21 |
+
if "setup" in span:
|
| 22 |
+
spans_with_type.append((span, "setup"))
|
| 23 |
+
elif "loop" in span:
|
| 24 |
+
spans_with_type.append((span, "loop"))
|
| 25 |
+
elif "main" in span:
|
| 26 |
+
spans_with_type.append((span, "command"))
|
| 27 |
+
else:
|
| 28 |
+
spans_with_type.append((span, "other"))
|
| 29 |
+
|
| 30 |
+
spans_with_type_added_device_control = []
|
| 31 |
+
|
| 32 |
+
for span, type_ in spans_with_type:
|
| 33 |
+
if type_ == "setup":
|
| 34 |
+
spans_with_type_added_device_control.append((
|
| 35 |
+
"""cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh\n\n"""
|
| 36 |
+
, "device_control"))
|
| 37 |
+
spans_with_type_added_device_control.append((span, type_))
|
| 38 |
+
# spans_with_type_added_device_control.append((gpu_env, "device_control"))
|
| 39 |
+
# spans_with_type_added_device_control.append((update_device_func, "device_control"))
|
| 40 |
+
elif type_ == "loop":
|
| 41 |
+
spans_with_type_added_device_control.append((span, type_))
|
| 42 |
+
elif type_ == "command":
|
| 43 |
+
spans_with_type_added_device_control.append((update_device_command, "device_control"))
|
| 44 |
+
span_remove_the_first_part = span[span.index("\n"):]
|
| 45 |
+
if not debug:
|
| 46 |
+
spans_with_type_added_device_control.append((f"\n\ncommand=\"\"\"{span_remove_the_first_part}\"\"\"\n", type_))
|
| 47 |
+
run_command = "eval $command"
|
| 48 |
+
run_command += " &"
|
| 49 |
+
run_command += "\n\n\n"
|
| 50 |
+
spans_with_type_added_device_control.append((run_command, type_))
|
| 51 |
+
else:
|
| 52 |
+
spans_with_type_added_device_control.append(
|
| 53 |
+
(f"{span_remove_the_first_part}\n", type_)
|
| 54 |
+
)
|
| 55 |
+
sleep_command = "sleep $sleeptime\n\n"
|
| 56 |
+
spans_with_type_added_device_control.append((sleep_command, type_))
|
| 57 |
+
else:
|
| 58 |
+
spans_with_type_added_device_control.append((span, type_))
|
| 59 |
+
spans_without_type = [span for span, type_ in spans_with_type_added_device_control]
|
| 60 |
+
spans_without_type_str = "".join(spans_without_type)
|
| 61 |
+
return spans_without_type_str
|
res/20230615-17h44m58s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h44m58s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h45m38s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h45m38s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h46m42s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h46m42s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h48m58s.zip
ADDED
|
Binary file (1.9 kB). View file
|
|
|
res/20230615-17h48m58s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h48m58s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h49m08s.zip
ADDED
|
Binary file (1.9 kB). View file
|
|
|
res/20230615-17h49m08s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h49m08s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h49m45s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h49m45s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h50m13s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h50m13s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h50m22s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h50m22s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;
|
res/20230615-17h50m57s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h50m57s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h51m33s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h51m33s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h51m43s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h51m43s/script.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
command="""
|
| 19 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 20 |
+
"""
|
| 21 |
+
eval $command &
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
sleep $sleeptime
|
| 25 |
+
|
| 26 |
+
#####
|
| 27 |
+
|
| 28 |
+
#####
|
| 29 |
+
done;done;
|
res/20230615-17h53m28s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h53m28s/script.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 19 |
+
|
| 20 |
+
sleep $sleeptime
|
| 21 |
+
|
| 22 |
+
#####
|
| 23 |
+
|
| 24 |
+
#####
|
| 25 |
+
done;done;
|
res/20230615-17h53m44s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h53m44s/script.sh
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
update_device_idx;
|
| 16 |
+
|
| 17 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 18 |
+
|
| 19 |
+
sleep $sleeptime
|
| 20 |
+
|
| 21 |
+
#####
|
| 22 |
+
|
| 23 |
+
#####
|
| 24 |
+
done;done;
|
res/20230615-17h55m17s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=True
|
| 13 |
+
constrain_rate=True
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h55m17s/script.sh
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
update_device_idx;
|
| 15 |
+
|
| 16 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 17 |
+
|
| 18 |
+
sleep $sleeptime
|
| 19 |
+
|
| 20 |
+
#####
|
| 21 |
+
|
| 22 |
+
#####
|
| 23 |
+
done;done;
|
res/20230615-17h56m22s/gpu_utility.sh
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
username=yila22
|
| 3 |
+
max_gpu_utilization=90
|
| 4 |
+
total_gpu_memory=24564
|
| 5 |
+
max_gpu_memory_gap=5000
|
| 6 |
+
available_devices=( 0 1 2 3 4 )
|
| 7 |
+
current_device_idx=-1
|
| 8 |
+
sleeptime=10
|
| 9 |
+
cpu_mean_max=77
|
| 10 |
+
memory_rate_max=80
|
| 11 |
+
constrain_total=True
|
| 12 |
+
constrain_mine=False
|
| 13 |
+
constrain_rate=2
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
function update_device_idx {
|
| 18 |
+
if [ $constrain_total = true ]; then
|
| 19 |
+
# check total cpu usage
|
| 20 |
+
while true; do
|
| 21 |
+
cpu_mean_1=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 22 |
+
sleep 1
|
| 23 |
+
cpu_mean_2=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 24 |
+
sleep 1
|
| 25 |
+
cpu_mean_3=$(mpstat -P ALL 1 1 | awk '/Average:/ && $2 ~ /[0-9]/ { cpu_usage=100-$NF; total+=cpu_usage; count++ } END { print total/count }')
|
| 26 |
+
cpu_mean=$(echo "scale=2; ($cpu_mean_1+$cpu_mean_2+$cpu_mean_3)/3" | bc)
|
| 27 |
+
|
| 28 |
+
# if currently cpu usage is less than the threshold, then break
|
| 29 |
+
if [ $(echo "$cpu_mean < $cpu_mean_max" | bc) -eq 1 ]; then
|
| 30 |
+
echo "total cpu mean: $cpu_mean is less than $cpu_mean_max, continue to check total memory usage"
|
| 31 |
+
break
|
| 32 |
+
else
|
| 33 |
+
echo "total cpu mean: $cpu_mean is greater than $cpu_mean_max, sleep 10 seconds"
|
| 34 |
+
sleep 10
|
| 35 |
+
fi
|
| 36 |
+
done;
|
| 37 |
+
|
| 38 |
+
# check total memory usage
|
| 39 |
+
while true; do
|
| 40 |
+
# get memory usage of whole system
|
| 41 |
+
mem_used_1=$(free -m | awk '/Mem:/ {print $3}')
|
| 42 |
+
sleep 1
|
| 43 |
+
mem_used_2=$(free -m | awk '/Mem:/ {print $3}')
|
| 44 |
+
sleep 1
|
| 45 |
+
mem_used_3=$(free -m | awk '/Mem:/ {print $3}')
|
| 46 |
+
mem_used=$(echo "scale=2; ($mem_used_1+$mem_used_2+$mem_used_3)/3" | bc)
|
| 47 |
+
|
| 48 |
+
# echo $mem_used
|
| 49 |
+
# get rate of memory usage
|
| 50 |
+
mem_rate=$(echo "scale=2; $mem_used/$(free -m | awk '/Mem:/ {print $2}')*100" | bc)
|
| 51 |
+
# echo $mem_rate
|
| 52 |
+
if [ $(echo "$mem_rate < $memory_rate_max" | bc) -eq 1 ]; then
|
| 53 |
+
echo "total memory rate: $mem_rate is less than $memory_rate_max, continue to check my own cpu and memory usage"
|
| 54 |
+
break
|
| 55 |
+
else
|
| 56 |
+
echo "total memory rate: $mem_rate is greater than $memory_rate_max, sleep 10 seconds"
|
| 57 |
+
sleep 10
|
| 58 |
+
fi
|
| 59 |
+
done;
|
| 60 |
+
fi;
|
| 61 |
+
|
| 62 |
+
# if constrain_mine
|
| 63 |
+
if [ $constrain_mine = true ]; then
|
| 64 |
+
|
| 65 |
+
# check my own cpu and memory usage, it should be less than 1/$constrain_rate of the given cpu_mean_max / memory_rate_max
|
| 66 |
+
while true; do
|
| 67 |
+
username=$username_mine
|
| 68 |
+
cpu_usage_user_sum=$(ps -u $username -o %cpu | awk '{sum+=$1} END {print sum}')
|
| 69 |
+
# echo $cpu_usage_user_sum
|
| 70 |
+
total_aviable_cpu=$(nproc)
|
| 71 |
+
total_aviable_cpu=$(echo "$total_aviable_cpu*100" | bc)
|
| 72 |
+
# echo $total_aviable_cpu
|
| 73 |
+
cpu_usage_user_ratio=$(echo "scale=2; $cpu_usage_user_sum/$total_aviable_cpu*100" | bc)
|
| 74 |
+
# echo $cpu_usage_user_ratio
|
| 75 |
+
|
| 76 |
+
memory_usage_user_sum=$(ps -u $username -o rss | awk '{sum+=$1} END {print sum/1024}')
|
| 77 |
+
# echo $memory_usage_user_sum
|
| 78 |
+
memory_usage_total=$(free -m | awk '/Mem:/ {print $2}')
|
| 79 |
+
# echo $memory_usage_total
|
| 80 |
+
memory_usage_user_ratio=$(echo "scale=2; $memory_usage_user_sum/$memory_usage_total*100" | bc)
|
| 81 |
+
# echo $memory_usage_user_ratio
|
| 82 |
+
|
| 83 |
+
# so my ratio should be less than 1/$constrain_rate of the given threshold
|
| 84 |
+
cpu_mean_max_mine=$(echo "$cpu_mean_max/$constrain_rate" | bc)
|
| 85 |
+
memory_rate_max_mine=$(echo "$memory_rate_max/$constrain_rate" | bc)
|
| 86 |
+
if [ $(echo "$cpu_usage_user_ratio < $cpu_mean_max_mine" | bc) -eq 1 ] && [ $(echo "$memory_usage_user_ratio < $memory_rate_max_mine" | bc) -eq 1 ]; then
|
| 87 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is less than half of the given threshold for cpu: $cpu_mean_max_mine and memory: $memory_rate_max_mine, ready to take off"
|
| 88 |
+
break
|
| 89 |
+
else
|
| 90 |
+
echo "my cpu usage: $cpu_usage_user_ratio, memory usage: $memory_usage_user_ratio is greater than half of the given threshold, sleep 10 seconds"
|
| 91 |
+
sleep 10
|
| 92 |
+
fi
|
| 93 |
+
done;
|
| 94 |
+
fi;
|
| 95 |
+
|
| 96 |
+
# so all the conditions are satisfied, we can update the device idx and run the next experiment
|
| 97 |
+
while true; do
|
| 98 |
+
current_device_idx=$((current_device_idx+1))
|
| 99 |
+
if [ $current_device_idx -ge ${#available_devices[@]} ]; then
|
| 100 |
+
# reset
|
| 101 |
+
current_device_idx=0
|
| 102 |
+
fi
|
| 103 |
+
# check whether this device is fully booked using nvidia-smi
|
| 104 |
+
# get the gpu current memory usage
|
| 105 |
+
useage=$(nvidia-smi --query-gpu=memory.used --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 106 |
+
utilization=$(nvidia-smi --query-gpu=utilization.gpu --format=csv,noheader,nounits -i ${available_devices[$current_device_idx]})
|
| 107 |
+
|
| 108 |
+
if [ $useage -ge $((total_aviable-max_gpu_memory_gap)) ] || [ $utilization -ge $max_gpu_utilization ]; then
|
| 109 |
+
echo "device ${available_devices[$current_device_idx]} is fully booked, try next one"
|
| 110 |
+
sleep 3
|
| 111 |
+
continue
|
| 112 |
+
else
|
| 113 |
+
break
|
| 114 |
+
fi
|
| 115 |
+
done
|
| 116 |
+
echo "current device: ${available_devices[$current_device_idx]}"
|
| 117 |
+
device=${available_devices[$current_device_idx]}
|
| 118 |
+
}
|
res/20230615-17h56m22s/script.sh
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cd $(cd "$(dirname "$0")";pwd); source gpu_utility.sh
|
| 2 |
+
|
| 3 |
+
##### setup
|
| 4 |
+
export CUDA_VISIBLE_DEVICES=2
|
| 5 |
+
source activate /data/yixin/anaconda/mib
|
| 6 |
+
exp_name="single_user"
|
| 7 |
+
#####
|
| 8 |
+
|
| 9 |
+
##### loop
|
| 10 |
+
for poison_method in char_basic word_basic sent_basic; do
|
| 11 |
+
for dataset_idx in 0 1 2; do
|
| 12 |
+
#####
|
| 13 |
+
|
| 14 |
+
update_device_idx;
|
| 15 |
+
|
| 16 |
+
python single_user.py --dataset_idx $dataset_idx --trigger_size 1 --target 0 --loc 0 --batch_size 16 --num_epochs 2 --poison_method $poison_method --lr 5e-5 --pattern 0 --exp_name $exp_name --log_wb
|
| 17 |
+
|
| 18 |
+
sleep $sleeptime
|
| 19 |
+
|
| 20 |
+
#####
|
| 21 |
+
|
| 22 |
+
#####
|
| 23 |
+
done;done;
|