Xsmos commited on
Commit
534b1b2
·
verified ·
1 Parent(s): 906ffc5
diffusion.py CHANGED
@@ -445,6 +445,9 @@ class DDPM21CM:
445
  # flip diagonally
446
  if getrandbits(1):
447
  img[idx] = img[idx].clone().transpose(1,2)
 
 
 
448
  return img
449
 
450
  def train(self):
 
445
  # flip diagonally
446
  if getrandbits(1):
447
  img[idx] = img[idx].clone().transpose(1,2)
448
+ #print(f"transform: img.shape={img.shape}, idx={idx}, flip_xy={flip_xy}, w/ transpose")
449
+ #else:
450
+ #print(f"transform: img.shape={img.shape}, idx={idx}, flip_xy={flip_xy}, w/o tranpose")
451
  return img
452
 
453
  def train(self):
perlmutter_diffusion.sbatch CHANGED
@@ -5,7 +5,7 @@
5
  #SBATCH -q regular #shared
6
  #SBATCH -N4
7
  #SBATCH --gpus-per-node=4
8
- #SBATCH -t 48:00:00
9
  #SBATCH --ntasks-per-node=1
10
  #SBATCH -oReport-%j
11
  #SBATCH --mail-type=BEGIN,END,FAIL
@@ -30,7 +30,7 @@ cat $0
30
  srun python diffusion.py \
31
  --num_image 1600 \
32
  --batch_size 2 \
33
- --n_epoch 60 \
34
  --channel_mult 0.5 1 2 4 4 8 \
35
  --num_new_img_per_gpu 4 \
36
  --max_num_img_per_gpu 2 \
@@ -40,6 +40,6 @@ srun python diffusion.py \
40
  --dropout 0.2 \
41
  --lrate 2e-5 \
42
  --train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \
43
- #--resume ./outputs/model-N1600-device_count4-node4-epoch19-32185426 \
44
 
45
  date
 
5
  #SBATCH -q regular #shared
6
  #SBATCH -N4
7
  #SBATCH --gpus-per-node=4
8
+ #SBATCH -t 16:00:00
9
  #SBATCH --ntasks-per-node=1
10
  #SBATCH -oReport-%j
11
  #SBATCH --mail-type=BEGIN,END,FAIL
 
30
  srun python diffusion.py \
31
  --num_image 1600 \
32
  --batch_size 2 \
33
+ --n_epoch 20 \
34
  --channel_mult 0.5 1 2 4 4 8 \
35
  --num_new_img_per_gpu 4 \
36
  --max_num_img_per_gpu 2 \
 
40
  --dropout 0.2 \
41
  --lrate 2e-5 \
42
  --train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \
43
+ #--resume ./outputs/model-N1600-device_count4-node4-epoch19-32257242 \
44
 
45
  date
quantify_results.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6691f0135f3bb373506e6090511c5527f23e0b4dc780f031bf80ca6d141e32ca
3
- size 25396988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6893bbfd60b952c7a56b8eea8506da7df6e47513a4cd301d0980db2e0dafd5cc
3
+ size 24436267
tensorboard.ipynb CHANGED
@@ -2,12 +2,21 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "id": "ae45e44e-a11c-43ef-b830-c7a58a72f51e",
7
  "metadata": {
8
  "tags": []
9
  },
10
- "outputs": [],
 
 
 
 
 
 
 
 
 
11
  "source": [
12
  "import nersc_tensorboard_helper\n",
13
  "%load_ext tensorboard"
@@ -15,21 +24,30 @@
15
  },
16
  {
17
  "cell_type": "code",
18
- "execution_count": 2,
19
  "id": "a5c088b8-5051-402f-b4ec-2b684ad5a952",
20
  "metadata": {},
21
  "outputs": [
 
 
 
 
 
 
 
 
 
22
  {
23
  "data": {
24
  "text/html": [
25
  "\n",
26
- " <iframe id=\"tensorboard-frame-b3fe77206bcde3f5\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
27
  " </iframe>\n",
28
  " <script>\n",
29
  " (function() {\n",
30
- " const frame = document.getElementById(\"tensorboard-frame-b3fe77206bcde3f5\");\n",
31
  " const url = new URL(\"/\", window.location);\n",
32
- " const port = 33553;\n",
33
  " if (port) {\n",
34
  " url.port = port;\n",
35
  " }\n",
@@ -52,14 +70,14 @@
52
  },
53
  {
54
  "cell_type": "code",
55
- "execution_count": 3,
56
  "id": "2f76c0a9-2218-4073-86aa-f4f655d7642f",
57
  "metadata": {},
58
  "outputs": [
59
  {
60
  "data": {
61
  "text/html": [
62
- "<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33553/\">https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33553/</a>"
63
  ],
64
  "text/plain": [
65
  "<IPython.core.display.HTML object>"
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 4,
6
  "id": "ae45e44e-a11c-43ef-b830-c7a58a72f51e",
7
  "metadata": {
8
  "tags": []
9
  },
10
+ "outputs": [
11
+ {
12
+ "name": "stdout",
13
+ "output_type": "stream",
14
+ "text": [
15
+ "The tensorboard extension is already loaded. To reload it, use:\n",
16
+ " %reload_ext tensorboard\n"
17
+ ]
18
+ }
19
+ ],
20
  "source": [
21
  "import nersc_tensorboard_helper\n",
22
  "%load_ext tensorboard"
 
24
  },
25
  {
26
  "cell_type": "code",
27
+ "execution_count": 5,
28
  "id": "a5c088b8-5051-402f-b4ec-2b684ad5a952",
29
  "metadata": {},
30
  "outputs": [
31
+ {
32
+ "data": {
33
+ "text/plain": [
34
+ "Reusing TensorBoard on port 33249 (pid 774528), started 4:01:53 ago. (Use '!kill 774528' to kill it.)"
35
+ ]
36
+ },
37
+ "metadata": {},
38
+ "output_type": "display_data"
39
+ },
40
  {
41
  "data": {
42
  "text/html": [
43
  "\n",
44
+ " <iframe id=\"tensorboard-frame-a8fadb66b88e501a\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
45
  " </iframe>\n",
46
  " <script>\n",
47
  " (function() {\n",
48
+ " const frame = document.getElementById(\"tensorboard-frame-a8fadb66b88e501a\");\n",
49
  " const url = new URL(\"/\", window.location);\n",
50
+ " const port = 33249;\n",
51
  " if (port) {\n",
52
  " url.port = port;\n",
53
  " }\n",
 
70
  },
71
  {
72
  "cell_type": "code",
73
+ "execution_count": 6,
74
  "id": "2f76c0a9-2218-4073-86aa-f4f655d7642f",
75
  "metadata": {},
76
  "outputs": [
77
  {
78
  "data": {
79
  "text/html": [
80
+ "<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33249/\">https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33249/</a>"
81
  ],
82
  "text/plain": [
83
  "<IPython.core.display.HTML object>"