Spaces:
Runtime error
Runtime error
[start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
Browse files
app.py
CHANGED
|
@@ -381,12 +381,12 @@ def worker(input_image, prompts, n_prompt, seed, resolution, total_second_length
|
|
| 381 |
|
| 382 |
if not high_vram:
|
| 383 |
load_model_as_complete(image_encoder, target_device=gpu)
|
|
|
|
|
|
|
| 384 |
|
| 385 |
-
return start_latent
|
| 386 |
|
| 387 |
-
start_latent = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 388 |
-
|
| 389 |
-
image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
|
| 390 |
|
| 391 |
# Dtype
|
| 392 |
|
|
@@ -602,12 +602,12 @@ def worker_last_frame(input_image, prompts, n_prompt, seed, resolution, total_se
|
|
| 602 |
|
| 603 |
if not high_vram:
|
| 604 |
load_model_as_complete(image_encoder, target_device=gpu)
|
|
|
|
|
|
|
| 605 |
|
| 606 |
-
return start_latent
|
| 607 |
|
| 608 |
-
start_latent = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
| 609 |
-
|
| 610 |
-
image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
|
| 611 |
|
| 612 |
# Dtype
|
| 613 |
|
|
|
|
| 381 |
|
| 382 |
if not high_vram:
|
| 383 |
load_model_as_complete(image_encoder, target_device=gpu)
|
| 384 |
+
|
| 385 |
+
image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
|
| 386 |
|
| 387 |
+
return [start_latent, image_encoder_last_hidden_state]
|
| 388 |
|
| 389 |
+
[start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
|
|
|
|
|
|
| 390 |
|
| 391 |
# Dtype
|
| 392 |
|
|
|
|
| 602 |
|
| 603 |
if not high_vram:
|
| 604 |
load_model_as_complete(image_encoder, target_device=gpu)
|
| 605 |
+
|
| 606 |
+
image_encoder_last_hidden_state = hf_clip_vision_encode(input_image_np, feature_extractor, image_encoder).last_hidden_state
|
| 607 |
|
| 608 |
+
return [start_latent, image_encoder_last_hidden_state]
|
| 609 |
|
| 610 |
+
[start_latent, image_encoder_last_hidden_state] = get_start_latent(input_image, height, width, vae, gpu, image_encoder, high_vram)
|
|
|
|
|
|
|
| 611 |
|
| 612 |
# Dtype
|
| 613 |
|