13133235
Browse files- perlmutter_diffusion.sbatch +8 -8
- tensorboard.ipynb +4 -4
perlmutter_diffusion.sbatch
CHANGED
|
@@ -2,10 +2,10 @@
|
|
| 2 |
#SBATCH -A m4717
|
| 3 |
#SBATCH -J diffusion
|
| 4 |
#SBATCH -C gpu&hbm80g
|
| 5 |
-
#SBATCH -q
|
| 6 |
-
#SBATCH -
|
| 7 |
-
#SBATCH --gpus-per-node=
|
| 8 |
-
#SBATCH -t
|
| 9 |
#SBATCH --ntasks-per-node=1
|
| 10 |
#SBATCH -oReport-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL
|
|
@@ -40,12 +40,12 @@ srun python diffusion.py \
|
|
| 40 |
--batch_size 2 \
|
| 41 |
--n_epoch 80 \
|
| 42 |
--channel_mult 0.5 1 2 4 4 8 \
|
| 43 |
-
--num_new_img_per_gpu
|
| 44 |
-
--max_num_img_per_gpu
|
| 45 |
--gradient_accumulation_steps 1 \
|
| 46 |
--autocast 1 \
|
| 47 |
--use_checkpoint 1 \
|
| 48 |
-
--
|
| 49 |
-
#--
|
| 50 |
|
| 51 |
date
|
|
|
|
| 2 |
#SBATCH -A m4717
|
| 3 |
#SBATCH -J diffusion
|
| 4 |
#SBATCH -C gpu&hbm80g
|
| 5 |
+
#SBATCH -q shared #regular
|
| 6 |
+
#SBATCH -N1
|
| 7 |
+
#SBATCH --gpus-per-node=1
|
| 8 |
+
#SBATCH -t 6:00:00
|
| 9 |
#SBATCH --ntasks-per-node=1
|
| 10 |
#SBATCH -oReport-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL
|
|
|
|
| 40 |
--batch_size 2 \
|
| 41 |
--n_epoch 80 \
|
| 42 |
--channel_mult 0.5 1 2 4 4 8 \
|
| 43 |
+
--num_new_img_per_gpu 9 \
|
| 44 |
+
--max_num_img_per_gpu 3 \
|
| 45 |
--gradient_accumulation_steps 1 \
|
| 46 |
--autocast 1 \
|
| 47 |
--use_checkpoint 1 \
|
| 48 |
+
--resume ./outputs/model-N1280-device_count4-node5-epoch34-13133235 \
|
| 49 |
+
#--train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \
|
| 50 |
|
| 51 |
date
|
tensorboard.ipynb
CHANGED
|
@@ -23,13 +23,13 @@
|
|
| 23 |
"data": {
|
| 24 |
"text/html": [
|
| 25 |
"\n",
|
| 26 |
-
" <iframe id=\"tensorboard-frame-
|
| 27 |
" </iframe>\n",
|
| 28 |
" <script>\n",
|
| 29 |
" (function() {\n",
|
| 30 |
-
" const frame = document.getElementById(\"tensorboard-frame-
|
| 31 |
" const url = new URL(\"/\", window.location);\n",
|
| 32 |
-
" const port =
|
| 33 |
" if (port) {\n",
|
| 34 |
" url.port = port;\n",
|
| 35 |
" }\n",
|
|
@@ -59,7 +59,7 @@
|
|
| 59 |
{
|
| 60 |
"data": {
|
| 61 |
"text/html": [
|
| 62 |
-
"<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/
|
| 63 |
],
|
| 64 |
"text/plain": [
|
| 65 |
"<IPython.core.display.HTML object>"
|
|
|
|
| 23 |
"data": {
|
| 24 |
"text/html": [
|
| 25 |
"\n",
|
| 26 |
+
" <iframe id=\"tensorboard-frame-c11f24d3c7445b04\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
|
| 27 |
" </iframe>\n",
|
| 28 |
" <script>\n",
|
| 29 |
" (function() {\n",
|
| 30 |
+
" const frame = document.getElementById(\"tensorboard-frame-c11f24d3c7445b04\");\n",
|
| 31 |
" const url = new URL(\"/\", window.location);\n",
|
| 32 |
+
" const port = 46861;\n",
|
| 33 |
" if (port) {\n",
|
| 34 |
" url.port = port;\n",
|
| 35 |
" }\n",
|
|
|
|
| 59 |
{
|
| 60 |
"data": {
|
| 61 |
"text/html": [
|
| 62 |
+
"<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/46861/\">https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/46861/</a>"
|
| 63 |
],
|
| 64 |
"text/plain": [
|
| 65 |
"<IPython.core.display.HTML object>"
|