Handling mismatch tensor size
Browse files
animatediff/pipelines/pipeline_animation.py
CHANGED
|
@@ -307,43 +307,48 @@ class AnimationPipeline(DiffusionPipeline):
|
|
| 307 |
else:
|
| 308 |
init_latents = None
|
| 309 |
|
| 310 |
-
|
| 311 |
if isinstance(generator, list) and len(generator) != batch_size:
|
| 312 |
raise ValueError(
|
| 313 |
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
| 314 |
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
| 315 |
)
|
|
|
|
| 316 |
if latents is None:
|
| 317 |
rand_device = "cpu" if device.type == "mps" else device
|
| 318 |
|
| 319 |
if isinstance(generator, list):
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
for i in range(
|
| 326 |
-
|
| 327 |
-
|
|
|
|
|
|
|
| 328 |
else:
|
| 329 |
-
|
| 330 |
if init_latents is not None:
|
|
|
|
| 331 |
for i in range(video_length):
|
| 332 |
-
|
| 333 |
-
# gradully reduce init alpha along video frames (loosen restriction)
|
| 334 |
-
init_alpha = (video_length - float(i)) / video_length / 30
|
| 335 |
latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
|
|
|
|
|
|
|
|
|
|
| 336 |
else:
|
| 337 |
if latents.shape != shape:
|
| 338 |
raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
|
| 339 |
latents = latents.to(device)
|
| 340 |
|
| 341 |
-
#
|
| 342 |
-
#latents = latents * self.scheduler.init_noise_sigma
|
| 343 |
if init_latents is None:
|
| 344 |
latents = latents * self.scheduler.init_noise_sigma
|
|
|
|
| 345 |
return latents
|
| 346 |
|
|
|
|
| 347 |
@torch.no_grad()
|
| 348 |
def __call__(
|
| 349 |
self,
|
|
|
|
| 307 |
else:
|
| 308 |
init_latents = None
|
| 309 |
|
|
|
|
| 310 |
if isinstance(generator, list) and len(generator) != batch_size:
|
| 311 |
raise ValueError(
|
| 312 |
f"You have passed a list of generators of length {len(generator)}, but requested an effective batch"
|
| 313 |
f" size of {batch_size}. Make sure the batch size matches the length of the generators."
|
| 314 |
)
|
| 315 |
+
|
| 316 |
if latents is None:
|
| 317 |
rand_device = "cpu" if device.type == "mps" else device
|
| 318 |
|
| 319 |
if isinstance(generator, list):
|
| 320 |
+
# Initialize latents as a random tensor
|
| 321 |
+
latents = torch.randn(shape, device=rand_device, dtype=dtype)
|
| 322 |
+
|
| 323 |
+
# If init_latents is not None, copy the values for each video frame
|
| 324 |
+
if init_latents is not None:
|
| 325 |
+
for i in range(video_length):
|
| 326 |
+
init_alpha = (video_length - float(i)) / video_length / 30
|
| 327 |
+
latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
|
| 328 |
+
|
| 329 |
+
latents = latents.to(device)
|
| 330 |
else:
|
| 331 |
+
# If init_latents is not None, repeat it for the entire batch
|
| 332 |
if init_latents is not None:
|
| 333 |
+
init_latents = init_latents.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
|
| 334 |
for i in range(video_length):
|
| 335 |
+
init_alpha = (video_length - float(i)) / video_length / 30
|
|
|
|
|
|
|
| 336 |
latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
|
| 337 |
+
else:
|
| 338 |
+
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
|
| 339 |
+
|
| 340 |
else:
|
| 341 |
if latents.shape != shape:
|
| 342 |
raise ValueError(f"Unexpected latents shape, got {latents.shape}, expected {shape}")
|
| 343 |
latents = latents.to(device)
|
| 344 |
|
| 345 |
+
# Scale the initial noise by the standard deviation required by the scheduler
|
|
|
|
| 346 |
if init_latents is None:
|
| 347 |
latents = latents * self.scheduler.init_noise_sigma
|
| 348 |
+
|
| 349 |
return latents
|
| 350 |
|
| 351 |
+
|
| 352 |
@torch.no_grad()
|
| 353 |
def __call__(
|
| 354 |
self,
|