AnimateDiff-Image-Init

Paused

App Files Files Community

fffiloni commited on Jul 25, 2023

Commit

929d7f2

1 Parent(s): c65aada

Handling mismatch tensor size

Browse files

Files changed (1) hide show

animatediff/pipelines/pipeline_animation.py +20 -15

animatediff/pipelines/pipeline_animation.py CHANGED Viewed

@@ -307,43 +307,48 @@ class AnimationPipeline(DiffusionPipeline):
         else:
             init_latents = None
         if isinstance(generator, list) and len(generator) != batch_size:
             raise ValueError(
                 f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
                 f" size of {batch_size}. Make sure the batch size matches the length of the generators."
             )
         if latents is None:
             rand_device = "cpu" if device.type == "mps" else device
             if isinstance(generator, list):
-                shape = shape
-                # shape = (1,) + shape[1:]
-                # ignore init latents for batch model
-                latents = [
-                    torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype)
-                    for i in range(batch_size)
-                ]
-                latents = torch.cat(latents, dim=0).to(device)
             else:
-                latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
                 if init_latents is not None:
                     for i in range(video_length):
-                        # I just feel dividing by 30 yield stable result but I don't know why
-                        # gradully reduce init alpha along video frames (loosen restriction)
-                        init_alpha = (video_length - float(i)) / video_length / 30
                         latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
         else:
             if latents.shape != shape:
                 raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
             latents = latents.to(device)
-        # scale the initial noise by the standard deviation required by the scheduler
-        #latents = latents * self.scheduler.init_noise_sigma
         if init_latents is None:
             latents = latents * self.scheduler.init_noise_sigma
         return latents
     @torch.no_grad()
     def __call__(
         self,

         else:
             init_latents = None
         if isinstance(generator, list) and len(generator) != batch_size:
             raise ValueError(
                 f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
                 f" size of {batch_size}. Make sure the batch size matches the length of the generators."
             )
         if latents is None:
             rand_device = "cpu" if device.type == "mps" else device
             if isinstance(generator, list):
+                # Initialize latents as a random tensor
+                latents = torch.randn(shape, device=rand_device, dtype=dtype)
+                # If init_latents is not None, copy the values for each video frame
+                if init_latents is not None:
+                    for i in range(video_length):
+                        init_alpha = (video_length - float(i)) / video_length / 30
+                        latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
+                latents = latents.to(device)
             else:
+                # If init_latents is not None, repeat it for the entire batch
                 if init_latents is not None:
+                    init_latents = init_latents.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
                     for i in range(video_length):
+                        init_alpha = (video_length - float(i)) / video_length / 30
                         latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
+                else:
+                    latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
         else:
             if latents.shape != shape:
                 raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
             latents = latents.to(device)
+        # Scale the initial noise by the standard deviation required by the scheduler
         if init_latents is None:
             latents = latents * self.scheduler.init_noise_sigma
         return latents
     @torch.no_grad()
     def __call__(
         self,