feat(method): support fp16
Browse files
ImageReward/ImageReward.py
CHANGED
|
@@ -81,6 +81,12 @@ class ImageReward(nn.Module):
|
|
| 81 |
self.mean = 0.16717362830052426
|
| 82 |
self.std = 1.0333394966054072
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
def score_gard(self, prompt_ids, prompt_attention_mask, image):
|
| 85 |
|
| 86 |
image_embeds = self.blip.visual_encoder(image)
|
|
|
|
| 81 |
self.mean = 0.16717362830052426
|
| 82 |
self.std = 1.0333394966054072
|
| 83 |
|
| 84 |
+
def text_tokenizer(self, prompt):
|
| 85 |
+
# text encode
|
| 86 |
+
text_input = self.blip.tokenizer(prompt, padding='max_length', truncation=True, max_length=35,
|
| 87 |
+
return_tensors="pt").to(self.device)
|
| 88 |
+
return text_input
|
| 89 |
+
|
| 90 |
def score_gard(self, prompt_ids, prompt_attention_mask, image):
|
| 91 |
|
| 92 |
image_embeds = self.blip.visual_encoder(image)
|
README.md
CHANGED
|
@@ -215,12 +215,18 @@ python svgdreamer.py x=ink "prompt='Big Wild Goose Pagoda. ink style. Minimalist
|
|
| 215 |
|
| 216 |
- I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
|
| 217 |
- `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
|
| 218 |
-
- `neg_prompt` negative prompts affect the quality of the results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
## 📋 TODO
|
| 221 |
|
| 222 |
-
- [x] Release the code
|
| 223 |
-
- [x] Add docker image
|
|
|
|
| 224 |
|
| 225 |
## :books: Acknowledgement
|
| 226 |
|
|
|
|
| 215 |
|
| 216 |
- I highly recommend turning on xformer `enable_xformers=True` to speed up optimization.
|
| 217 |
- `x.vpsd.t_schedule` greatly affects the style of the result. Please try more.
|
| 218 |
+
- `neg_prompt` negative prompts affect the quality of the results
|
| 219 |
+
- By setting `state.mprec='fp16'`, you can significantly reduce GPU memory usage.
|
| 220 |
+
|
| 221 |
+
```shell
|
| 222 |
+
CUDA_VISIBLE_DEVICES=0 python svgdreamer.py x=iconography skip_sive=True "prompt='Sydney opera house. oil painting. by Van Gogh'" result_path='./logs-reward/Sydney-reward' state.mprec='fp16' x.vpsd.phi_ReFL=True multirun=True srange='[10,12]'
|
| 223 |
+
```
|
| 224 |
|
| 225 |
## 📋 TODO
|
| 226 |
|
| 227 |
+
- [x] Release the code.
|
| 228 |
+
- [x] Add docker image.
|
| 229 |
+
- [x] Support fp16 optimization.
|
| 230 |
|
| 231 |
## :books: Acknowledgement
|
| 232 |
|
svgdreamer/painter/VPSD_pipeline.py
CHANGED
|
@@ -24,7 +24,12 @@ from svgdreamer.diffusers_warp import init_StableDiffusion_pipeline, init_diffus
|
|
| 24 |
|
| 25 |
class VectorizedParticleSDSPipeline(torch.nn.Module):
|
| 26 |
|
| 27 |
-
def __init__(self,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
super().__init__()
|
| 29 |
self.device = device
|
| 30 |
assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
|
|
@@ -32,7 +37,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
|
|
| 32 |
|
| 33 |
pipe_kwargs = {
|
| 34 |
"device": self.device,
|
| 35 |
-
"torch_dtype": torch.float32,
|
| 36 |
"local_files_only": not diffuser_cfg.download,
|
| 37 |
"force_download": diffuser_cfg.force_download,
|
| 38 |
"resume_download": diffuser_cfg.resume_download,
|
|
@@ -569,7 +574,7 @@ class VectorizedParticleSDSPipeline(torch.nn.Module):
|
|
| 569 |
noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
|
| 570 |
|
| 571 |
# w(t), sigma_t^2
|
| 572 |
-
w = (1 - self.alphas[self.t])
|
| 573 |
grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
|
| 574 |
grad = torch.nan_to_num(grad)
|
| 575 |
|
|
|
|
| 24 |
|
| 25 |
class VectorizedParticleSDSPipeline(torch.nn.Module):
|
| 26 |
|
| 27 |
+
def __init__(self,
|
| 28 |
+
model_cfg: DictConfig,
|
| 29 |
+
diffuser_cfg: DictConfig,
|
| 30 |
+
guidance_cfg: DictConfig,
|
| 31 |
+
device: torch.device,
|
| 32 |
+
dtype):
|
| 33 |
super().__init__()
|
| 34 |
self.device = device
|
| 35 |
assert guidance_cfg.n_particle >= guidance_cfg.vsd_n_particle
|
|
|
|
| 37 |
|
| 38 |
pipe_kwargs = {
|
| 39 |
"device": self.device,
|
| 40 |
+
"torch_dtype": torch.float16 if dtype == 'fp16' else torch.float32,
|
| 41 |
"local_files_only": not diffuser_cfg.download,
|
| 42 |
"force_download": diffuser_cfg.force_download,
|
| 43 |
"resume_download": diffuser_cfg.resume_download,
|
|
|
|
| 574 |
noise_pred_est = self.get_noise_map(noise_pred_est, self.guidance_scale_lora, use_cfg=False)
|
| 575 |
|
| 576 |
# w(t), sigma_t^2
|
| 577 |
+
w = (1 - self.alphas[self.t]).to(pred_rgb.dtype)
|
| 578 |
grad = grad_scale * w * (noise_pred_pretrain - noise_pred_est.detach())
|
| 579 |
grad = torch.nan_to_num(grad)
|
| 580 |
|
svgdreamer/pipelines/SVGDreamer_pipeline.py
CHANGED
|
@@ -445,7 +445,8 @@ class SVGDreamerPipeline(ModelState):
|
|
| 445 |
path_reinit = self.x_cfg.path_reinit
|
| 446 |
|
| 447 |
# init VPSD
|
| 448 |
-
pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg,
|
|
|
|
| 449 |
# init reward model
|
| 450 |
reward_model = None
|
| 451 |
if guidance_cfg.phi_ReFL:
|
|
@@ -522,7 +523,7 @@ class SVGDreamerPipeline(ModelState):
|
|
| 522 |
self.frame_idx += 1
|
| 523 |
|
| 524 |
L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
|
| 525 |
-
raster_imgs,
|
| 526 |
self.step,
|
| 527 |
prompt=[text_prompt],
|
| 528 |
negative_prompt=self.args.neg_prompt,
|
|
|
|
| 445 |
path_reinit = self.x_cfg.path_reinit
|
| 446 |
|
| 447 |
# init VPSD
|
| 448 |
+
pipeline = VectorizedParticleSDSPipeline(vpsd_model_cfg, self.args.diffuser, guidance_cfg,
|
| 449 |
+
self.device, self.args.state.mprec)
|
| 450 |
# init reward model
|
| 451 |
reward_model = None
|
| 452 |
if guidance_cfg.phi_ReFL:
|
|
|
|
| 523 |
self.frame_idx += 1
|
| 524 |
|
| 525 |
L_guide, grad, latents, t_step = pipeline.variational_score_distillation(
|
| 526 |
+
raster_imgs.to(self.weight_dtype),
|
| 527 |
self.step,
|
| 528 |
prompt=[text_prompt],
|
| 529 |
negative_prompt=self.args.neg_prompt,
|