Update animatediff/pipelines/pipeline_animation.py
Browse files
animatediff/pipelines/pipeline_animation.py
CHANGED
|
@@ -317,25 +317,28 @@ class AnimationPipeline(DiffusionPipeline):
|
|
| 317 |
rand_device = "cpu" if device.type == "mps" else device
|
| 318 |
|
| 319 |
if isinstance(generator, list):
|
| 320 |
-
|
| 321 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 322 |
|
| 323 |
-
# If init_latents is not None, copy the values for each video frame
|
| 324 |
-
if init_latents is not None:
|
| 325 |
-
for i in range(video_length):
|
| 326 |
-
init_alpha = (video_length - float(i)) / video_length / 30
|
| 327 |
-
latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
|
| 328 |
|
| 329 |
-
latents = latents.to(device)
|
| 330 |
else:
|
| 331 |
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
|
| 332 |
-
|
| 333 |
-
# If init_latents is not None, repeat it for the entire batch
|
| 334 |
if init_latents is not None:
|
| 335 |
-
|
| 336 |
for i in range(video_length):
|
| 337 |
-
|
| 338 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
|
| 340 |
else:
|
| 341 |
if latents.shape != shape:
|
|
|
|
| 317 |
rand_device = "cpu" if device.type == "mps" else device
|
| 318 |
|
| 319 |
if isinstance(generator, list):
|
| 320 |
+
shape = shape
|
| 321 |
+
# shape = (1,) + shape[1:]
|
| 322 |
+
# ignore init latents for batch model
|
| 323 |
+
latents = [
|
| 324 |
+
torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype)
|
| 325 |
+
for i in range(batch_size)
|
| 326 |
+
]
|
| 327 |
+
latents = torch.cat(latents, dim=0).to(device)
|
| 328 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 329 |
|
|
|
|
| 330 |
else:
|
| 331 |
latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
|
|
|
|
|
|
|
| 332 |
if init_latents is not None:
|
| 333 |
+
|
| 334 |
for i in range(video_length):
|
| 335 |
+
# I just feel dividing by 30 yield stable result but I don't know why
|
| 336 |
+
# gradully reduce init alpha along video frames (loosen restriction)
|
| 337 |
+
init_alpha = (video_length - float(i)) / video_length / 30
|
| 338 |
+
latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
|
| 342 |
|
| 343 |
else:
|
| 344 |
if latents.shape != shape:
|