Spaces:

HReynaud
/

EchoFlow

Running

App Files Files Community

HReynaud commited on Mar 25

Commit

153ce0d

1 Parent(s): 308c0d9

ZeroGPU

Browse files

Files changed (1) hide show

demo.py +6 -8

demo.py CHANGED Viewed

@@ -306,13 +306,12 @@ def generate_latent_image(mask, class_selection, sampling_steps=50):
 @spaces.GPU
-def decode_images(latents, vae):
     """Decode latent representations to pixel space using a VAE.
     Args:
         latents: A numpy array of shape [B, C, H, W] for single image
                 or [B, C, T, H, W] for sequences/animations
-        vae: The VAE model for decoding
     Returns:
         numpy array of decoded images in [B, H, W, 3] format for single image
@@ -321,6 +320,9 @@ def decode_images(latents, vae):
     if latents is None:
         return None
     # Convert to torch tensor if needed
     if not isinstance(latents, torch.Tensor):
         latents = torch.from_numpy(latents).to(device, dtype=dtype)
@@ -365,7 +367,6 @@ def decode_images(latents, vae):
 def decode_latent_to_pixel(latent_image):
     """Decode a single latent image to pixel space"""
-    global vae
     if latent_image is None:
         return None
@@ -373,7 +374,7 @@ def decode_latent_to_pixel(latent_image):
     if len(latent_image.shape) == 3:
         latent_image = latent_image[None, ...]
-    decoded_image = decode_images(latent_image, vae)
     decoded_image = cv2.resize(
         decoded_image, (400, 400), interpolation=cv2.INTER_NEAREST
     )
@@ -493,7 +494,6 @@ def generate_animation(
 def decode_animation(latent_animation):
     """Decode a latent animation to pixel space"""
-    global vae
     if latent_animation is None:
         return None
@@ -506,9 +506,7 @@ def decode_animation(latent_animation):
         latent_animation = latent_animation[None, ...]  # Add batch dimension
     # Decode using VAE
-    decoded = decode_images(
-        latent_animation, vae
-    )  # Returns B x C x T x H x W numpy array
     # Remove batch dimension and transpose to T x H x W x C
     decoded = np.transpose(decoded[0], (1, 2, 3, 0))  # [T, H, W, C]

 @spaces.GPU
+def decode_images(latents):
     """Decode latent representations to pixel space using a VAE.
     Args:
         latents: A numpy array of shape [B, C, H, W] for single image
                 or [B, C, T, H, W] for sequences/animations
     Returns:
         numpy array of decoded images in [B, H, W, 3] format for single image
     if latents is None:
         return None
+    vae = vae.to(device, dtype=dtype)
+    vae.eval()
     # Convert to torch tensor if needed
     if not isinstance(latents, torch.Tensor):
         latents = torch.from_numpy(latents).to(device, dtype=dtype)
 def decode_latent_to_pixel(latent_image):
     """Decode a single latent image to pixel space"""
     if latent_image is None:
         return None
     if len(latent_image.shape) == 3:
         latent_image = latent_image[None, ...]
+    decoded_image = decode_images(latent_image)
     decoded_image = cv2.resize(
         decoded_image, (400, 400), interpolation=cv2.INTER_NEAREST
     )
 def decode_animation(latent_animation):
     """Decode a latent animation to pixel space"""
     if latent_animation is None:
         return None
         latent_animation = latent_animation[None, ...]  # Add batch dimension
     # Decode using VAE
+    decoded = decode_images(latent_animation)  # Returns B x C x T x H x W numpy array
     # Remove batch dimension and transpose to T x H x W x C
     decoded = np.transpose(decoded[0], (1, 2, 3, 0))  # [T, H, W, C]