shaw commited on
Commit
ea3ddc8
·
unverified ·
2 Parent(s): c599c74 a0c4308

Merge pull request #2 from ashawkey/main

Browse files
Files changed (6) hide show
  1. main_nerf.py → main.py +12 -6
  2. nerf/renderer.py +9 -3
  3. nerf/sd.py +5 -3
  4. readme.md +21 -10
  5. requirements.txt +5 -2
  6. scripts/run.sh +3 -3
main_nerf.py → main.py RENAMED
@@ -16,6 +16,8 @@ if __name__ == '__main__':
16
  parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
17
  parser.add_argument('-O2', action='store_true', help="equals --fp16 --dir_text")
18
  parser.add_argument('--test', action='store_true', help="test mode")
 
 
19
  parser.add_argument('--workspace', type=str, default='workspace')
20
  parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
21
  parser.add_argument('--seed', type=int, default=0)
@@ -26,7 +28,7 @@ if __name__ == '__main__':
26
  parser.add_argument('--ckpt', type=str, default='latest')
27
  parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
28
  parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
29
- parser.add_argument('--num_steps', type=int, default=256, help="num steps sampled per ray (only valid when not using --cuda_ray)")
30
  parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
31
  parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
32
  parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
@@ -37,7 +39,7 @@ if __name__ == '__main__':
37
  # network backbone
38
  parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
39
  parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
40
- # rendering resolution in training
41
  parser.add_argument('--w', type=int, default=128, help="render width for NeRF in training")
42
  parser.add_argument('--h', type=int, default=128, help="render height for NeRF in training")
43
 
@@ -68,8 +70,8 @@ if __name__ == '__main__':
68
 
69
  if opt.O:
70
  opt.fp16 = True
71
- opt.cuda_ray = True
72
  opt.dir_text = True
 
73
  elif opt.O2:
74
  opt.fp16 = True
75
  opt.dir_text = True
@@ -105,7 +107,9 @@ if __name__ == '__main__':
105
  else:
106
  test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
107
  trainer.test(test_loader)
108
- # trainer.save_mesh(resolution=256)
 
 
109
 
110
  else:
111
 
@@ -126,7 +130,7 @@ if __name__ == '__main__':
126
  # decay to 0.01 * init_lr at last iter step
127
  scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.01 ** min(iter / opt.iters, 1))
128
 
129
- trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=1)
130
 
131
  if opt.gui:
132
  trainer.train_loader = train_loader # attach dataloader to trainer
@@ -143,4 +147,6 @@ if __name__ == '__main__':
143
  # also test
144
  test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
145
  trainer.test(test_loader)
146
- trainer.save_mesh(resolution=256)
 
 
 
16
  parser.add_argument('-O', action='store_true', help="equals --fp16 --cuda_ray --dir_text")
17
  parser.add_argument('-O2', action='store_true', help="equals --fp16 --dir_text")
18
  parser.add_argument('--test', action='store_true', help="test mode")
19
+ parser.add_argument('--save_mesh', action='store_true', help="export an obj mesh with texture")
20
+ parser.add_argument('--eval_interval', type=int, default=10, help="evaluate on the valid set every interval epochs")
21
  parser.add_argument('--workspace', type=str, default='workspace')
22
  parser.add_argument('--guidance', type=str, default='stable-diffusion', help='choose from [stable-diffusion, clip]')
23
  parser.add_argument('--seed', type=int, default=0)
 
28
  parser.add_argument('--ckpt', type=str, default='latest')
29
  parser.add_argument('--cuda_ray', action='store_true', help="use CUDA raymarching instead of pytorch")
30
  parser.add_argument('--max_steps', type=int, default=1024, help="max num steps sampled per ray (only valid when using --cuda_ray)")
31
+ parser.add_argument('--num_steps', type=int, default=128, help="num steps sampled per ray (only valid when not using --cuda_ray)")
32
  parser.add_argument('--upsample_steps', type=int, default=0, help="num steps up-sampled per ray (only valid when not using --cuda_ray)")
33
  parser.add_argument('--update_extra_interval', type=int, default=16, help="iter interval to update extra status (only valid when using --cuda_ray)")
34
  parser.add_argument('--max_ray_batch', type=int, default=4096, help="batch size of rays at inference to avoid OOM (only valid when not using --cuda_ray)")
 
39
  # network backbone
40
  parser.add_argument('--fp16', action='store_true', help="use amp mixed precision training")
41
  parser.add_argument('--backbone', type=str, default='grid', help="nerf backbone, choose from [grid, tcnn, vanilla]")
42
+ # rendering resolution in training, decrease this if CUDA OOM.
43
  parser.add_argument('--w', type=int, default=128, help="render width for NeRF in training")
44
  parser.add_argument('--h', type=int, default=128, help="render height for NeRF in training")
45
 
 
70
 
71
  if opt.O:
72
  opt.fp16 = True
 
73
  opt.dir_text = True
74
+ opt.cuda_ray = True
75
  elif opt.O2:
76
  opt.fp16 = True
77
  opt.dir_text = True
 
107
  else:
108
  test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
109
  trainer.test(test_loader)
110
+
111
+ if opt.save_mesh:
112
+ trainer.save_mesh(resolution=256)
113
 
114
  else:
115
 
 
130
  # decay to 0.01 * init_lr at last iter step
131
  scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(optimizer, lambda iter: 0.01 ** min(iter / opt.iters, 1))
132
 
133
+ trainer = Trainer('ngp', opt, model, guidance, device=device, workspace=opt.workspace, optimizer=optimizer, ema_decay=0.95, fp16=opt.fp16, lr_scheduler=scheduler, use_checkpoint=opt.ckpt, eval_interval=opt.eval_interval)
134
 
135
  if opt.gui:
136
  trainer.train_loader = train_loader # attach dataloader to trainer
 
147
  # also test
148
  test_loader = NeRFDataset(opt, device=device, type='test', H=opt.H, W=opt.W, size=100).dataloader()
149
  trainer.test(test_loader)
150
+
151
+ if opt.save_mesh:
152
+ trainer.save_mesh(resolution=256)
nerf/renderer.py CHANGED
@@ -168,7 +168,7 @@ class NeRFRenderer(nn.Module):
168
  from sklearn.neighbors import NearestNeighbors
169
  from scipy.ndimage import binary_dilation, binary_erosion
170
 
171
- glctx = dr.RasterizeGLContext()
172
 
173
  atlas = xatlas.Atlas()
174
  atlas.add_mesh(v_np, f_np)
@@ -271,7 +271,7 @@ class NeRFRenderer(nn.Module):
271
 
272
  print(f'[INFO] writing obj mesh to {obj_file}')
273
  with open(obj_file, "w") as fp:
274
- fp.write(f'mtllib {name}.mtl \n')
275
 
276
  print(f'[INFO] writing vertices {v_np.shape}')
277
  for v in v_np:
@@ -320,6 +320,12 @@ class NeRFRenderer(nn.Module):
320
  nears.unsqueeze_(-1)
321
  fars.unsqueeze_(-1)
322
 
 
 
 
 
 
 
323
  #print(f'nears = {nears.min().item()} ~ {nears.max().item()}, fars = {fars.min().item()} ~ {fars.max().item()}')
324
 
325
  z_vals = torch.linspace(0.0, 1.0, num_steps, device=device).unsqueeze(0) # [1, T]
@@ -451,7 +457,7 @@ class NeRFRenderer(nn.Module):
451
  # random sample light_d if not provided
452
  if light_d is None:
453
  # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face)
454
- light_d = - (rays_o[0] + torch.randn(3, device=device, dtype=torch.float))
455
  light_d = safe_normalize(light_d)
456
 
457
  results = {}
 
168
  from sklearn.neighbors import NearestNeighbors
169
  from scipy.ndimage import binary_dilation, binary_erosion
170
 
171
+ glctx = dr.RasterizeCudaContext()
172
 
173
  atlas = xatlas.Atlas()
174
  atlas.add_mesh(v_np, f_np)
 
271
 
272
  print(f'[INFO] writing obj mesh to {obj_file}')
273
  with open(obj_file, "w") as fp:
274
+ fp.write(f'mtllib {name}mesh.mtl \n')
275
 
276
  print(f'[INFO] writing vertices {v_np.shape}')
277
  for v in v_np:
 
320
  nears.unsqueeze_(-1)
321
  fars.unsqueeze_(-1)
322
 
323
+ # random sample light_d if not provided
324
+ if light_d is None:
325
+ # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face)
326
+ light_d = (rays_o[0] + torch.randn(3, device=device, dtype=torch.float))
327
+ light_d = safe_normalize(light_d)
328
+
329
  #print(f'nears = {nears.min().item()} ~ {nears.max().item()}, fars = {fars.min().item()} ~ {fars.max().item()}')
330
 
331
  z_vals = torch.linspace(0.0, 1.0, num_steps, device=device).unsqueeze(0) # [1, T]
 
457
  # random sample light_d if not provided
458
  if light_d is None:
459
  # gaussian noise around the ray origin, so the light always face the view dir (avoid dark face)
460
+ light_d = (rays_o[0] + torch.randn(3, device=device, dtype=torch.float))
461
  light_d = safe_normalize(light_d)
462
 
463
  results = {}
nerf/sd.py CHANGED
@@ -41,6 +41,7 @@ class StableDiffusion(nn.Module):
41
 
42
  # 4. Create a scheduler for inference
43
  self.scheduler = PNDMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=self.num_train_timesteps)
 
44
 
45
  print(f'[INFO] loaded stable diffusion!')
46
 
@@ -93,8 +94,9 @@ class StableDiffusion(nn.Module):
93
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
94
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
95
 
96
- # w(t), one_minus_alpha_prod, i.e., sigma^2
97
- w = (1 - self.scheduler.alphas_cumprod[t]).to(self.device)
 
98
  grad = w * (noise_pred - noise)
99
 
100
  # clip grad for stable training?
@@ -105,7 +107,7 @@ class StableDiffusion(nn.Module):
105
  latents.backward(gradient=grad, retain_graph=True)
106
  # torch.cuda.synchronize(); print(f'[TIME] guiding: backward {time.time() - _t:.4f}s')
107
 
108
- return 0 # fake loss value
109
 
110
  def produce_latents(self, text_embeddings, height=512, width=512, num_inference_steps=50, guidance_scale=7.5, latents=None):
111
 
 
41
 
42
  # 4. Create a scheduler for inference
43
  self.scheduler = PNDMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=self.num_train_timesteps)
44
+ self.alphas = self.scheduler.alphas_cumprod.to(self.device) # for convenience
45
 
46
  print(f'[INFO] loaded stable diffusion!')
47
 
 
94
  noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
95
  noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
96
 
97
+ # w(t), alpha_t * sigma_t^2
98
+ # w = (1 - self.alphas[t])
99
+ w = self.alphas[t] ** 0.5 * (1 - self.alphas[t])
100
  grad = w * (noise_pred - noise)
101
 
102
  # clip grad for stable training?
 
107
  latents.backward(gradient=grad, retain_graph=True)
108
  # torch.cuda.synchronize(); print(f'[TIME] guiding: backward {time.time() - _t:.4f}s')
109
 
110
+ return 0 # dummy loss value
111
 
112
  def produce_latents(self, text_embeddings, height=512, width=512, num_inference_steps=50, guidance_scale=7.5, latents=None):
113
 
readme.md CHANGED
@@ -4,6 +4,8 @@ A pytorch implementation of the text-to-3D model **Dreamfusion**, powered by the
4
 
5
  The original paper's project page: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
6
 
 
 
7
  Examples generated from text prompt `a high quality photo of a pineapple` viewed with the GUI in real time:
8
 
9
  https://user-images.githubusercontent.com/25863658/194241493-f3e68f78-aefe-479e-a4a8-001424a61b37.mp4
@@ -37,14 +39,15 @@ cd stable-dreamfusion
37
  ```bash
38
  pip install -r requirements.txt
39
 
 
 
 
40
  # (optional) install the tcnn backbone if using --tcnn
41
  pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
42
 
43
  # (optional) install CLIP guidance for the dreamfield setting
44
  pip install git+https://github.com/openai/CLIP.git
45
 
46
- # (optional) install nvdiffrast for exporting textured mesh
47
- pip install git+https://github.com/NVlabs/nvdiffrast/
48
  ```
49
 
50
  ### Build extension (optional)
@@ -68,19 +71,20 @@ First time running will take some time to compile the CUDA extensions.
68
 
69
  ```bash
70
  ### stable-dreamfusion setting
71
- # train with text prompt
72
  # `-O` equals `--cuda_ray --fp16 --dir_text`
73
- python main_nerf.py --text "a hamburger" --workspace trial -O
74
 
 
75
  # test (exporting 360 video, and an obj mesh with png texture)
76
- python main_nerf.py --text "a hamburger" --workspace trial -O --test
77
 
78
  # test with a GUI (free view control!)
79
- python main_nerf.py --text "a hamburger" --workspace trial -O --test --gui
80
 
81
  ### dreamfields (CLIP) setting
82
- python main_nerf.py --text "a hamburger" --workspace trial_clip -O --guidance clip
83
- python main_nerf.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
84
  ```
85
 
86
  # Code organization & Advanced tips
@@ -104,13 +108,20 @@ latents.backward(gradient=grad, retain_graph=True)
104
  * Other regularizations are in `./nerf/utils.py > Trainer > train_step`.
105
  * The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
106
  * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
 
 
 
 
 
 
 
 
107
  * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
108
- * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian ,and textureless
109
  * light direction: current implementation use a plane light source, instead of a point light source...
110
  * View-dependent prompting: `./nerf/provider.py > get_view_direction`.
111
  * ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?
112
  * Network backbone (`./nerf/network*.py`) can be chosen by the `--backbone` option, but `tcnn` and `vanilla` are not well tested.
113
- * the occupancy grid based training acceleration (instant-ngp like) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later.
114
  * Spatial density bias (gaussian density blob): `./nerf/network*.py > NeRFNetwork > gaussian`.
115
 
116
  # Acknowledgement
 
4
 
5
  The original paper's project page: [_DreamFusion: Text-to-3D using 2D Diffusion_](https://dreamfusion3d.github.io/).
6
 
7
+ Colab notebook for usage: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1MXT3yfOFvO0ooKEfiUUvTKwUkrrlCHpF?usp=sharing)
8
+
9
  Examples generated from text prompt `a high quality photo of a pineapple` viewed with the GUI in real time:
10
 
11
  https://user-images.githubusercontent.com/25863658/194241493-f3e68f78-aefe-479e-a4a8-001424a61b37.mp4
 
39
  ```bash
40
  pip install -r requirements.txt
41
 
42
+ # (optional) install nvdiffrast for exporting textured mesh (--save_mesh)
43
+ pip install git+https://github.com/NVlabs/nvdiffrast/
44
+
45
  # (optional) install the tcnn backbone if using --tcnn
46
  pip install git+https://github.com/NVlabs/tiny-cuda-nn/#subdirectory=bindings/torch
47
 
48
  # (optional) install CLIP guidance for the dreamfield setting
49
  pip install git+https://github.com/openai/CLIP.git
50
 
 
 
51
  ```
52
 
53
  ### Build extension (optional)
 
71
 
72
  ```bash
73
  ### stable-dreamfusion setting
74
+ ## train with text prompt
75
  # `-O` equals `--cuda_ray --fp16 --dir_text`
76
+ python main.py --text "a hamburger" --workspace trial -O
77
 
78
+ ## after the training is finished:
79
  # test (exporting 360 video, and an obj mesh with png texture)
80
+ python main.py --workspace trial -O --test
81
 
82
  # test with a GUI (free view control!)
83
+ python main.py --workspace trial -O --test --gui
84
 
85
  ### dreamfields (CLIP) setting
86
+ python main.py --text "a hamburger" --workspace trial_clip -O --guidance clip
87
+ python main.py --text "a hamburger" --workspace trial_clip -O --test --gui --guidance clip
88
  ```
89
 
90
  # Code organization & Advanced tips
 
108
  * Other regularizations are in `./nerf/utils.py > Trainer > train_step`.
109
  * The generation seems quite sensitive to regularizations on weights_sum (alphas for each ray). The original opacity loss tends to make NeRF disappear (zero density everywhere), so we use an entropy loss to replace it for now (encourages alpha to be either 0 or 1).
110
  * NeRF Rendering core function: `./nerf/renderer.py > NeRFRenderer > run_cuda`.
111
+ * the occupancy grid based training acceleration (instant-ngp like, enabled by `--cuda_ray`) may harm the generation progress, since once a grid cell is marked as empty, rays won't pass it later...
112
+ * Not using `--cuda_ray` also works now:
113
+ ```bash
114
+ # `-O2` equals `--fp16 --dir_text`
115
+ python main.py --text "a hamburger" --workspace trial -O2 # faster training, but slower rendering
116
+ ```
117
+ Training is faster if only sample 128 points uniformly per ray (5h --> 2.5h).
118
+ More testing is needed...
119
  * Shading & normal evaluation: `./nerf/network*.py > NeRFNetwork > forward`. Current implementation harms training and is disabled.
120
+ * use `--albedo_iters 1000` to enable random shading mode after 1000 steps from albedo, lambertian, and textureless.
121
  * light direction: current implementation use a plane light source, instead of a point light source...
122
  * View-dependent prompting: `./nerf/provider.py > get_view_direction`.
123
  * ues `--angle_overhead, --angle_front` to set the border. How to better divide front/back/side regions?
124
  * Network backbone (`./nerf/network*.py`) can be chosen by the `--backbone` option, but `tcnn` and `vanilla` are not well tested.
 
125
  * Spatial density bias (gaussian density blob): `./nerf/network*.py > NeRFNetwork > gaussian`.
126
 
127
  # Acknowledgement
requirements.txt CHANGED
@@ -10,9 +10,12 @@ tqdm
10
  matplotlib
11
  PyMCubes
12
  rich
13
- pysdf
14
  dearpygui
15
  scipy
 
16
  diffusers
17
  transformers
18
- xatlas
 
 
 
 
10
  matplotlib
11
  PyMCubes
12
  rich
 
13
  dearpygui
14
  scipy
15
+ huggingface_hub
16
  diffusers
17
  transformers
18
+ xatlas
19
+ scikit-learn
20
+ imageio
21
+ imageio-ffmpeg
scripts/run.sh CHANGED
@@ -1,5 +1,5 @@
1
  #! /bin/bash
2
 
3
- CUDA_VISIBLE_DEVICES=1 python main_nerf.py -O --text "a DSLR photo of cthulhu" --workspace trial_cthulhu
4
- CUDA_VISIBLE_DEVICES=1 python main_nerf.py -O --text "a DSLR photo of a squirrel" --workspace trial_squirrel
5
- CUDA_VISIBLE_DEVICES=1 python main_nerf.py -O --text "a DSLR photo of a cat lying on its side batting at a ball of yarn" --workspace trial_cat_lying
 
1
  #! /bin/bash
2
 
3
+ CUDA_VISIBLE_DEVICES=1 python main.py -O --text "a DSLR photo of cthulhu" --workspace trial_cthulhu
4
+ CUDA_VISIBLE_DEVICES=1 python main.py -O --text "a DSLR photo of a squirrel" --workspace trial_squirrel
5
+ CUDA_VISIBLE_DEVICES=1 python main.py -O --text "a DSLR photo of a cat lying on its side batting at a ball of yarn" --workspace trial_cat_lying