Xsmos commited on
Commit
85918f6
·
verified ·
1 Parent(s): 96de58e
Files changed (3) hide show
  1. diffusion.py +2 -2
  2. quantify_results.ipynb +2 -2
  3. tensorboard.ipynb +4 -4
diffusion.py CHANGED
@@ -431,7 +431,7 @@ class DDPM21CM:
431
  raise ValueError(f"len(self.dataloader) % self.config.gradient_accumulation_steps = {len(self.dataloader) % self.config.gradient_accumulation_steps} instead of 0. Make sure len(dataloader)={len(self.dataloader)} is dividable by gradient_accumulation_steps={self.config.gradient_accumulation_steps}.")
432
 
433
  dataloader_end = time()
434
- print(f"cuda:{torch.cuda.current_device()}/{self.config.global_rank} dataloader costs {dataloader_end-dataloader_start:.3f}s")
435
 
436
  del dataset
437
 
@@ -489,7 +489,7 @@ class DDPM21CM:
489
  global_step = 0
490
  for ep in range(self.config.n_epoch):
491
  self.ddpm.train()
492
- pbar_train = tqdm(total=len(self.dataloader), file=sys.stderr)#, disable=True)#, mininterval=self.config.pbar_update_step)#, disable=True)#not self.accelerator.is_local_main_process)
493
  pbar_train.set_description(f"{socket.gethostbyname(socket.gethostname())} cuda:{torch.cuda.current_device()}/{self.config.global_rank} Epoch {ep}")
494
  epoch_start = time()
495
  for i, (x, c) in enumerate(self.dataloader):
 
431
  raise ValueError(f"len(self.dataloader) % self.config.gradient_accumulation_steps = {len(self.dataloader) % self.config.gradient_accumulation_steps} instead of 0. Make sure len(dataloader)={len(self.dataloader)} is dividable by gradient_accumulation_steps={self.config.gradient_accumulation_steps}.")
432
 
433
  dataloader_end = time()
434
+ #print(f"cuda:{torch.cuda.current_device()}/{self.config.global_rank} dataloader costs {dataloader_end-dataloader_start:.3f}s")
435
 
436
  del dataset
437
 
 
489
  global_step = 0
490
  for ep in range(self.config.n_epoch):
491
  self.ddpm.train()
492
+ pbar_train = tqdm(total=len(self.dataloader), file=sys.stderr, disable=True)#, mininterval=self.config.pbar_update_step)#, disable=True)#not self.accelerator.is_local_main_process)
493
  pbar_train.set_description(f"{socket.gethostbyname(socket.gethostname())} cuda:{torch.cuda.current_device()}/{self.config.global_rank} Epoch {ep}")
494
  epoch_start = time()
495
  for i, (x, c) in enumerate(self.dataloader):
quantify_results.ipynb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f5c609710980f1c8798c5f4732afe3f28bce2a24799b0ef5028f1c9fef85a5d
3
- size 15711677
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4feb0d9bf444b9783c9d63200ff956c00b720a75e82a3b25597432ea88122b2a
3
+ size 16041018
tensorboard.ipynb CHANGED
@@ -23,13 +23,13 @@
23
  "data": {
24
  "text/html": [
25
  "\n",
26
- " <iframe id=\"tensorboard-frame-f840e130425733a4\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
27
  " </iframe>\n",
28
  " <script>\n",
29
  " (function() {\n",
30
- " const frame = document.getElementById(\"tensorboard-frame-f840e130425733a4\");\n",
31
  " const url = new URL(\"/\", window.location);\n",
32
- " const port = 33313;\n",
33
  " if (port) {\n",
34
  " url.port = port;\n",
35
  " }\n",
@@ -59,7 +59,7 @@
59
  {
60
  "data": {
61
  "text/html": [
62
- "<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33313/\">https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/33313/</a>"
63
  ],
64
  "text/plain": [
65
  "<IPython.core.display.HTML object>"
 
23
  "data": {
24
  "text/html": [
25
  "\n",
26
+ " <iframe id=\"tensorboard-frame-8bbb5cb424abc4b5\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
27
  " </iframe>\n",
28
  " <script>\n",
29
  " (function() {\n",
30
+ " const frame = document.getElementById(\"tensorboard-frame-8bbb5cb424abc4b5\");\n",
31
  " const url = new URL(\"/\", window.location);\n",
32
+ " const port = 34693;\n",
33
  " if (port) {\n",
34
  " url.port = port;\n",
35
  " }\n",
 
59
  {
60
  "data": {
61
  "text/html": [
62
+ "<a href=\"https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/34693/\">https://jupyter.nersc.gov/user/binxia/perlmutter-login-node-base/proxy/34693/</a>"
63
  ],
64
  "text/plain": [
65
  "<IPython.core.display.HTML object>"