32353762
Browse files- diffusion.py +3 -0
- perlmutter_diffusion.sbatch +3 -3
- quantify_results.ipynb +2 -2
- tensorboard.ipynb +26 -8
diffusion.py
CHANGED
|
@@ -445,6 +445,9 @@ class DDPM21CM:
|
|
| 445 |
# flip diagonally
|
| 446 |
if getrandbits(1):
|
| 447 |
img[idx] = img[idx].clone().transpose(1,2)
|
|
|
|
|
|
|
|
|
|
| 448 |
return img
|
| 449 |
|
| 450 |
def train(self):
|
|
|
|
| 445 |
# flip diagonally
|
| 446 |
if getrandbits(1):
|
| 447 |
img[idx] = img[idx].clone().transpose(1,2)
|
| 448 |
+
#print(f"transform: img.shape={img.shape}, idx={idx}, flip_xy={flip_xy}, w/ transpose")
|
| 449 |
+
#else:
|
| 450 |
+
#print(f"transform: img.shape={img.shape}, idx={idx}, flip_xy={flip_xy}, w/o tranpose")
|
| 451 |
return img
|
| 452 |
|
| 453 |
def train(self):
|
perlmutter_diffusion.sbatch
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
#SBATCH -q regular #shared
|
| 6 |
#SBATCH -N4
|
| 7 |
#SBATCH --gpus-per-node=4
|
| 8 |
-
#SBATCH -t
|
| 9 |
#SBATCH --ntasks-per-node=1
|
| 10 |
#SBATCH -oReport-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL
|
|
@@ -30,7 +30,7 @@ cat $0
|
|
| 30 |
srun python diffusion.py \
|
| 31 |
--num_image 1600 \
|
| 32 |
--batch_size 2 \
|
| 33 |
-
--n_epoch
|
| 34 |
--channel_mult 0.5 1 2 4 4 8 \
|
| 35 |
--num_new_img_per_gpu 4 \
|
| 36 |
--max_num_img_per_gpu 2 \
|
|
@@ -40,6 +40,6 @@ srun python diffusion.py \
|
|
| 40 |
--dropout 0.2 \
|
| 41 |
--lrate 2e-5 \
|
| 42 |
--train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \
|
| 43 |
-
#--resume ./outputs/model-N1600-device_count4-node4-epoch19-
|
| 44 |
|
| 45 |
date
|
|
|
|
| 5 |
#SBATCH -q regular #shared
|
| 6 |
#SBATCH -N4
|
| 7 |
#SBATCH --gpus-per-node=4
|
| 8 |
+
#SBATCH -t 16:00:00
|
| 9 |
#SBATCH --ntasks-per-node=1
|
| 10 |
#SBATCH -oReport-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL
|
|
|
|
| 30 |
srun python diffusion.py \
|
| 31 |
--num_image 1600 \
|
| 32 |
--batch_size 2 \
|
| 33 |
+
--n_epoch 20 \
|
| 34 |
--channel_mult 0.5 1 2 4 4 8 \
|
| 35 |
--num_new_img_per_gpu 4 \
|
| 36 |
--max_num_img_per_gpu 2 \
|
|
|
|
| 40 |
--dropout 0.2 \
|
| 41 |
--lrate 2e-5 \
|
| 42 |
--train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \
|
| 43 |
+
#--resume ./outputs/model-N1600-device_count4-node4-epoch19-32257242 \
|
| 44 |
|
| 45 |
date
|
quantify_results.ipynb
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6893bbfd60b952c7a56b8eea8506da7df6e47513a4cd301d0980db2e0dafd5cc
|
| 3 |
+
size 24436267
|
tensorboard.ipynb
CHANGED
|
@@ -2,12 +2,21 @@
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
-
"execution_count":
|
| 6 |
"id": "ae45e44e-a11c-43ef-b830-c7a58a72f51e",
|
| 7 |
"metadata": {
|
| 8 |
"tags": []
|
| 9 |
},
|
| 10 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
"source": [
|
| 12 |
"import nersc_tensorboard_helper\n",
|
| 13 |
"%load_ext tensorboard"
|
|
@@ -15,21 +24,30 @@
|
|
| 15 |
},
|
| 16 |
{
|
| 17 |
"cell_type": "code",
|
| 18 |
-
"execution_count":
|
| 19 |
"id": "a5c088b8-5051-402f-b4ec-2b684ad5a952",
|
| 20 |
"metadata": {},
|
| 21 |
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
{
|
| 23 |
"data": {
|
| 24 |
"text/html": [
|
| 25 |
"\n",
|
| 26 |
-
" <iframe id=\"tensorboard-frame-
|
| 27 |
" </iframe>\n",
|
| 28 |
" <script>\n",
|
| 29 |
" (function() {\n",
|
| 30 |
-
" const frame = document.getElementById(\"tensorboard-frame-
|
| 31 |
" const url = new URL(\"/\", window.location);\n",
|
| 32 |
-
" const port =
|
| 33 |
" if (port) {\n",
|
| 34 |
" url.port = port;\n",
|
| 35 |
" }\n",
|
|
@@ -52,14 +70,14 @@
|
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"cell_type": "code",
|
| 55 |
-
"execution_count":
|
| 56 |
"id": "2f76c0a9-2218-4073-86aa-f4f655d7642f",
|
| 57 |
"metadata": {},
|
| 58 |
"outputs": [
|
| 59 |
{
|
| 60 |
"data": {
|
| 61 |
"text/html": [
|
| 62 |
-
"<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/
|
| 63 |
],
|
| 64 |
"text/plain": [
|
| 65 |
"<IPython.core.display.HTML object>"
|
|
|
|
| 2 |
"cells": [
|
| 3 |
{
|
| 4 |
"cell_type": "code",
|
| 5 |
+
"execution_count": 4,
|
| 6 |
"id": "ae45e44e-a11c-43ef-b830-c7a58a72f51e",
|
| 7 |
"metadata": {
|
| 8 |
"tags": []
|
| 9 |
},
|
| 10 |
+
"outputs": [
|
| 11 |
+
{
|
| 12 |
+
"name": "stdout",
|
| 13 |
+
"output_type": "stream",
|
| 14 |
+
"text": [
|
| 15 |
+
"The tensorboard extension is already loaded. To reload it, use:\n",
|
| 16 |
+
" %reload_ext tensorboard\n"
|
| 17 |
+
]
|
| 18 |
+
}
|
| 19 |
+
],
|
| 20 |
"source": [
|
| 21 |
"import nersc_tensorboard_helper\n",
|
| 22 |
"%load_ext tensorboard"
|
|
|
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"cell_type": "code",
|
| 27 |
+
"execution_count": 5,
|
| 28 |
"id": "a5c088b8-5051-402f-b4ec-2b684ad5a952",
|
| 29 |
"metadata": {},
|
| 30 |
"outputs": [
|
| 31 |
+
{
|
| 32 |
+
"data": {
|
| 33 |
+
"text/plain": [
|
| 34 |
+
"Reusing TensorBoard on port 33249 (pid 774528), started 4:01:53 ago. (Use '!kill 774528' to kill it.)"
|
| 35 |
+
]
|
| 36 |
+
},
|
| 37 |
+
"metadata": {},
|
| 38 |
+
"output_type": "display_data"
|
| 39 |
+
},
|
| 40 |
{
|
| 41 |
"data": {
|
| 42 |
"text/html": [
|
| 43 |
"\n",
|
| 44 |
+
" <iframe id=\"tensorboard-frame-a8fadb66b88e501a\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
|
| 45 |
" </iframe>\n",
|
| 46 |
" <script>\n",
|
| 47 |
" (function() {\n",
|
| 48 |
+
" const frame = document.getElementById(\"tensorboard-frame-a8fadb66b88e501a\");\n",
|
| 49 |
" const url = new URL(\"/\", window.location);\n",
|
| 50 |
+
" const port = 33249;\n",
|
| 51 |
" if (port) {\n",
|
| 52 |
" url.port = port;\n",
|
| 53 |
" }\n",
|
|
|
|
| 70 |
},
|
| 71 |
{
|
| 72 |
"cell_type": "code",
|
| 73 |
+
"execution_count": 6,
|
| 74 |
"id": "2f76c0a9-2218-4073-86aa-f4f655d7642f",
|
| 75 |
"metadata": {},
|
| 76 |
"outputs": [
|
| 77 |
{
|
| 78 |
"data": {
|
| 79 |
"text/html": [
|
| 80 |
+
"<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33249/\">https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33249/</a>"
|
| 81 |
],
|
| 82 |
"text/plain": [
|
| 83 |
"<IPython.core.display.HTML object>"
|