Spaces:
Runtime error
Runtime error
Yixin Liu
commited on
Commit
·
c4ef4f9
1
Parent(s):
23b9bfb
update readme
Browse files
Procfile
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
web: sh setup.sh && streamlit run main.py
|
main.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import imp
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import numpy as np
|
| 5 |
+
import time
|
| 6 |
+
# import matplotlib.pyplot as plt
|
| 7 |
+
# import seaborn as sns
|
| 8 |
+
# import plotly.figure_factory as ff
|
| 9 |
+
# import altair as alt
|
| 10 |
+
# from PIL import Image
|
| 11 |
+
# import base64
|
| 12 |
+
# import tarfile
|
| 13 |
+
# import os
|
| 14 |
+
# import requests
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# title
|
| 19 |
+
st.title("Exp Command Generator")
|
| 20 |
+
|
| 21 |
+
## 检查框
|
| 22 |
+
debug = st.checkbox("Debug:选择则会串行地执行命令", value=True)
|
| 23 |
+
# st.write(f"checkbox的值是{res}")
|
| 24 |
+
|
| 25 |
+
setup = st.text_area("Some setup of env at beginning.", """cd $(dirname $(dirname $0))
|
| 26 |
+
source activate xai
|
| 27 |
+
export PYTHONPATH=${PYTHONPATH}:/Users/apple/Desktop/workspace/research_project/attention:/mnt/yixin/:/home/yila22/prj""")
|
| 28 |
+
|
| 29 |
+
exp_hyper = st.text_area("Hyperparameters", """exp_name="debug-adv-training-emotion"
|
| 30 |
+
dataset=emotion
|
| 31 |
+
n_epoch=3
|
| 32 |
+
K=3
|
| 33 |
+
encoder=bert
|
| 34 |
+
lambda_1=1
|
| 35 |
+
lambda_2=1
|
| 36 |
+
x_pgd_radius=0.01
|
| 37 |
+
pgd_radius=0.001
|
| 38 |
+
seed=2
|
| 39 |
+
bsize=8
|
| 40 |
+
lr=5e-5""")
|
| 41 |
+
|
| 42 |
+
## gpu 相关参数
|
| 43 |
+
gpu_list = st.multiselect("multi select", range(10), [1, 2, 3, 4, 5, 6, 7, 8, 9])
|
| 44 |
+
print(gpu_list)
|
| 45 |
+
allow_gpu_memory_threshold = st.number_input("最小单卡剩余容量", value=5000, min_value=0, max_value=30000, step=1000)
|
| 46 |
+
gpu_threshold = st.number_input("最大单卡利用率", value=70, min_value=0, max_value=100, step=10)
|
| 47 |
+
sleep_time_after_loading_task= st.number_input("加载任务后等待秒数", value=20, min_value=0,step=5)
|
| 48 |
+
all_full_sleep_time = st.number_input("全满之后等待秒数", value=20, min_value=0,step=5)
|
| 49 |
+
|
| 50 |
+
gpu_list_str = ' '.join([str(i) for i in gpu_list])
|
| 51 |
+
gpu_hyper = f"gpu=({gpu_list_str})\n"
|
| 52 |
+
gpu_hyper+=f"allow_gpu_memory_threshold={allow_gpu_memory_threshold}\n"
|
| 53 |
+
gpu_hyper+=f"gpu_threshold={gpu_threshold}\n"
|
| 54 |
+
gpu_hyper+=f"sleep_time_after_loading_task={sleep_time_after_loading_task}s\n"
|
| 55 |
+
gpu_hyper+=f"all_full_sleep_time={all_full_sleep_time}s\n"
|
| 56 |
+
gpu_hyper+=f"gpunum={len(gpu_list)}\n"
|
| 57 |
+
|
| 58 |
+
main_loop = st.text_area("Main loop", """for lambda_1 in 1 3;do
|
| 59 |
+
for lambda_2 in 1 10;do
|
| 60 |
+
for n_epoch in 3;do
|
| 61 |
+
for x_pgd_radius in 0.005 0.01;do
|
| 62 |
+
for pgd_radius in 0.0005 0.001 0.002;do
|
| 63 |
+
python train.py --dataset $dataset --data_dir . --output_dir ./outputs/ --attention tanh \
|
| 64 |
+
--encoder $encoder \
|
| 65 |
+
--exp_name $exp_name --lambda_1 $lambda_1 --lambda_2 $lambda_2 --pgd_radius $pgd_radius --x_pgd_radius $x_pgd_radius \
|
| 66 |
+
--K $K --seed $seed --train_mode adv_train --bsize $bsize --n_epoch $n_epoch --lr $lr \
|
| 67 |
+
--eval_baseline
|
| 68 |
+
done;done;done;done;done;""")
|
| 69 |
+
|
| 70 |
+
hyper_loop = main_loop.split("python")[0]
|
| 71 |
+
print(hyper_loop)
|
| 72 |
+
python_cmd = main_loop.split(";do\n")[-1].split('done;')[0]
|
| 73 |
+
print(python_cmd)
|
| 74 |
+
end_loop = "done;"*hyper_loop.count("\n")
|
| 75 |
+
print(end_loop)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
g = st.button("Generate")
|
| 79 |
+
if g:
|
| 80 |
+
s = ""
|
| 81 |
+
s += setup + "\n\n"
|
| 82 |
+
s += exp_hyper + "\n\n"
|
| 83 |
+
s += gpu_hyper + "\n\n"
|
| 84 |
+
s += hyper_loop + "\n\n"
|
| 85 |
+
s += """
|
| 86 |
+
i=0 # we search from the first gpu
|
| 87 |
+
while true; do
|
| 88 |
+
gpu_id=${gpu[$i]}
|
| 89 |
+
# nvidia-smi --query-gpu=utilization.gpu --format=csv -i 2 | grep -Eo "[0-9]+"
|
| 90 |
+
gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 91 |
+
free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 92 |
+
if [[ $free_mem -lt $allow_gpu_memory_threshold || $gpu_u -ge ${gpu_threshold} ]]; then
|
| 93 |
+
i=`expr $i + 1`
|
| 94 |
+
i=`expr $i % $gpunum`
|
| 95 |
+
echo "gpu id ${gpu[$i]} is full loaded, skip"
|
| 96 |
+
if [ "$i" == "0" ]; then
|
| 97 |
+
sleep ${all_full_sleep_time}
|
| 98 |
+
echo "all the gpus are full, sleep 1m"
|
| 99 |
+
fi
|
| 100 |
+
else
|
| 101 |
+
break
|
| 102 |
+
fi
|
| 103 |
+
done
|
| 104 |
+
|
| 105 |
+
gpu_id=${gpu[$i]}
|
| 106 |
+
free_mem=$(nvidia-smi --query-gpu=memory.free --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 107 |
+
gpu_u=$(nvidia-smi --query-gpu=utilization.gpu --format=csv -i $gpu_id | grep -Eo "[0-9]+")
|
| 108 |
+
export CUDA_VISIBLE_DEVICES=$gpu_id
|
| 109 |
+
echo "use gpu id is ${gpu[$i]}, free memory is $free_mem, it utilization is ${gpu_u}%"
|
| 110 |
+
"""
|
| 111 |
+
s += f"""com="{python_cmd}"\n"""
|
| 112 |
+
s += "echo $com\n"
|
| 113 |
+
s += "echo ==========================================================================================\n"
|
| 114 |
+
if debug:
|
| 115 |
+
s += "$com\n"
|
| 116 |
+
else:
|
| 117 |
+
s += "mkdir -p ./logs/\n"
|
| 118 |
+
s += "nohup $com > ./logs/$exp_name-$RANDOM.log 2>&1 &\n"
|
| 119 |
+
s += """echo "sleep for $sleep_time_after_loading_task to wait the task loaded"
|
| 120 |
+
sleep $sleep_time_after_loading_task\n"""
|
| 121 |
+
s += end_loop
|
| 122 |
+
st.success("Finished")
|
| 123 |
+
st.code(s, language="shell")
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy
|
| 2 |
+
streamlit
|
| 3 |
+
pandas
|
setup.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
mkdir -p ~/.streamlit/
|
| 2 |
+
|
| 3 |
+
echo "\
|
| 4 |
+
[general]\n\
|
| 5 |
+
email = \"yila22@lehigh.edu\"\n\
|
| 6 |
+
" > ~/.streamlit/credentials.toml
|
| 7 |
+
|
| 8 |
+
echo "\
|
| 9 |
+
[server]\n\
|
| 10 |
+
headless = true\n\
|
| 11 |
+
enableCORS=false\n\
|
| 12 |
+
port = $PORT\n\
|
| 13 |
+
" > ~/.streamlit/config.toml
|