Spaces:
Running
on
Zero
Running
on
Zero
<fix> only leave lora weights loading in process_image_and_text.
Browse files
app.py
CHANGED
|
@@ -52,6 +52,9 @@ def init_basemodel():
|
|
| 52 |
current_task = None
|
| 53 |
|
| 54 |
# init models
|
|
|
|
|
|
|
|
|
|
| 55 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
| 56 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 57 |
subfolder="vae")
|
|
@@ -69,12 +72,25 @@ def init_basemodel():
|
|
| 69 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 70 |
weight_dtype = torch.bfloat16
|
| 71 |
|
|
|
|
| 72 |
vae.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 73 |
text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 74 |
text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
|
|
|
|
| 75 |
vae.enable_tiling()
|
| 76 |
vae.enable_slicing()
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
@spaces.GPU
|
| 80 |
def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
|
|
@@ -83,14 +99,6 @@ def process_image_and_text(condition_image, target_prompt, condition_image_promp
|
|
| 83 |
if pipe is None or current_task != task:
|
| 84 |
current_task = task
|
| 85 |
|
| 86 |
-
# init transformer
|
| 87 |
-
global transformer
|
| 88 |
-
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 89 |
-
subfolder="transformer",
|
| 90 |
-
inference_subject_driven=task in ['subject_driven'])
|
| 91 |
-
transformer.requires_grad_(False)
|
| 92 |
-
transformer.to("cuda" if torch.cuda.is_available() else "cpu", dtype=torch.bfloat16)
|
| 93 |
-
|
| 94 |
# insert LoRA
|
| 95 |
lora_config = LoraConfig(
|
| 96 |
r=16,
|
|
@@ -164,17 +172,6 @@ def process_image_and_text(condition_image, target_prompt, condition_image_promp
|
|
| 164 |
|
| 165 |
transformer.requires_grad_(False)
|
| 166 |
|
| 167 |
-
pipe = HunyuanVideoImageToVideoPipeline(
|
| 168 |
-
text_encoder=text_encoder,
|
| 169 |
-
tokenizer=tokenizer,
|
| 170 |
-
transformer=transformer,
|
| 171 |
-
vae=vae,
|
| 172 |
-
scheduler=copy.deepcopy(scheduler),
|
| 173 |
-
text_encoder_2=text_encoder_2,
|
| 174 |
-
tokenizer_2=tokenizer_2,
|
| 175 |
-
image_processor=image_processor,
|
| 176 |
-
)
|
| 177 |
-
|
| 178 |
# start generation
|
| 179 |
c_txt = None if condition_image_prompt == "" else condition_image_prompt
|
| 180 |
c_img = condition_image.resize((512, 512))
|
|
|
|
| 52 |
current_task = None
|
| 53 |
|
| 54 |
# init models
|
| 55 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 56 |
+
subfolder="transformer",
|
| 57 |
+
inference_subject_driven=False)
|
| 58 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
| 59 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
| 60 |
subfolder="vae")
|
|
|
|
| 72 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 73 |
weight_dtype = torch.bfloat16
|
| 74 |
|
| 75 |
+
transformer.requires_grad_(False)
|
| 76 |
vae.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 77 |
text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 78 |
text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
|
| 79 |
+
transformer.to(device, dtype=weight_dtype)
|
| 80 |
vae.enable_tiling()
|
| 81 |
vae.enable_slicing()
|
| 82 |
|
| 83 |
+
pipe = HunyuanVideoImageToVideoPipeline(
|
| 84 |
+
text_encoder=text_encoder,
|
| 85 |
+
tokenizer=tokenizer,
|
| 86 |
+
transformer=transformer,
|
| 87 |
+
vae=vae,
|
| 88 |
+
scheduler=copy.deepcopy(scheduler),
|
| 89 |
+
text_encoder_2=text_encoder_2,
|
| 90 |
+
tokenizer_2=tokenizer_2,
|
| 91 |
+
image_processor=image_processor,
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
|
| 95 |
@spaces.GPU
|
| 96 |
def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
|
|
|
|
| 99 |
if pipe is None or current_task != task:
|
| 100 |
current_task = task
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
# insert LoRA
|
| 103 |
lora_config = LoraConfig(
|
| 104 |
r=16,
|
|
|
|
| 172 |
|
| 173 |
transformer.requires_grad_(False)
|
| 174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
# start generation
|
| 176 |
c_txt = None if condition_image_prompt == "" else condition_image_prompt
|
| 177 |
c_img = condition_image.resize((512, 512))
|