engrjaved commited on
Commit
b89c2e0
·
1 Parent(s): 3fe3503

offload text_encoder and transformer to CPU

Browse files
Files changed (1) hide show
  1. inference_script.py +4 -1
inference_script.py CHANGED
@@ -405,6 +405,8 @@ def process_video(
405
  video = video.to(pipe.vae.device, dtype=pipe.vae.dtype)
406
  latent_dist = pipe.vae.encode(video).latent_dist
407
  latent = latent_dist.sample() * pipe.vae.config.scaling_factor
 
 
408
 
409
  patch_size_t = pipe.transformer.config.patch_size_t
410
  if patch_size_t is not None:
@@ -471,7 +473,8 @@ def process_video(
471
 
472
  # offload text encoder to CPU
473
  pipe.text_encoder.to("cpu")
474
-
 
475
  # Predict noise
476
  predicted_noise = pipe.transformer(
477
  hidden_states=latent,
 
405
  video = video.to(pipe.vae.device, dtype=pipe.vae.dtype)
406
  latent_dist = pipe.vae.encode(video).latent_dist
407
  latent = latent_dist.sample() * pipe.vae.config.scaling_factor
408
+
409
+ pipe.text_encoder.to("cuda")
410
 
411
  patch_size_t = pipe.transformer.config.patch_size_t
412
  if patch_size_t is not None:
 
473
 
474
  # offload text encoder to CPU
475
  pipe.text_encoder.to("cpu")
476
+
477
+ pipe.transformer.to("cuda")
478
  # Predict noise
479
  predicted_noise = pipe.transformer(
480
  hidden_states=latent,