Spaces:
Running
on
Zero
Running
on
Zero
<fix> move transformer init to process_image_and_text.
Browse files
app.py
CHANGED
|
@@ -52,9 +52,6 @@ def init_basemodel():
|
|
| 52 |
global transformer, scheduler, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2, image_processor
|
| 53 |
|
| 54 |
# init models
|
| 55 |
-
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 56 |
-
subfolder="transformer",
|
| 57 |
-
inference_subject_driven=task in ['subject_driven'])
|
| 58 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
| 59 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 60 |
subfolder="vae")
|
|
@@ -72,11 +69,9 @@ def init_basemodel():
|
|
| 72 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 73 |
weight_dtype = torch.bfloat16
|
| 74 |
|
| 75 |
-
transformer.requires_grad_(False)
|
| 76 |
vae.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 77 |
text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 78 |
text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 79 |
-
transformer.to(device, dtype=weight_dtype)
|
| 80 |
vae.enable_tiling()
|
| 81 |
vae.enable_slicing()
|
| 82 |
|
|
@@ -85,6 +80,13 @@ def init_basemodel():
|
|
| 85 |
def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
|
| 86 |
# set up the model
|
| 87 |
if pipe is None or current_task != task:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
# insert LoRA
|
| 89 |
lora_config = LoraConfig(
|
| 90 |
r=16,
|
|
|
|
| 52 |
global transformer, scheduler, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2, image_processor
|
| 53 |
|
| 54 |
# init models
|
|
|
|
|
|
|
|
|
|
| 55 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
| 56 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 57 |
subfolder="vae")
|
|
|
|
| 69 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 70 |
weight_dtype = torch.bfloat16
|
| 71 |
|
|
|
|
| 72 |
vae.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 73 |
text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 74 |
text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
|
|
|
|
| 75 |
vae.enable_tiling()
|
| 76 |
vae.enable_slicing()
|
| 77 |
|
|
|
|
| 80 |
def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
|
| 81 |
# set up the model
|
| 82 |
if pipe is None or current_task != task:
|
| 83 |
+
# init transformer
|
| 84 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 85 |
+
subfolder="transformer",
|
| 86 |
+
inference_subject_driven=task in ['subject_driven'])
|
| 87 |
+
transformer.requires_grad_(False)
|
| 88 |
+
transformer.to("cuda" if torch.cuda.is_available() else "cpu", dtype=torch.bfloat16)
|
| 89 |
+
|
| 90 |
# insert LoRA
|
| 91 |
lora_config = LoraConfig(
|
| 92 |
r=16,
|