HunYuan-Keyframe2VID-Control-Lora

Build error

App Files Files Community

LPX55 commited on Mar 19, 2025

Commit

5a8cf56

verified ·

1 Parent(s): 6e842f9

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -35

app.py CHANGED Viewed

@@ -34,6 +34,35 @@ video_transforms = transforms.Compose(
         transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
     ]
 )
 def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: Tuple[int, int]) -> np.ndarray:
     """
@@ -65,37 +94,6 @@ def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: T
     image = image[crop_top : crop_top + bucket_height, crop_left : crop_left + bucket_width]
     return image
-def construct_video_pipeline(model_id: str, lora_path: str):
-    # Load model and LORA
-    transformer = HunyuanVideoTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
-    pipe = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.bfloat16)
-    # Enable memory savings
-    pipe.vae.enable_tiling()
-    pipe.enable_model_cpu_offload()
-    with torch.no_grad():  # enable image inputs
-        initial_input_channels = pipe.transformer.config.in_channels
-        new_img_in = HunyuanVideoPatchEmbed(
-            patch_size=(pipe.transformer.config.patch_size_t, pipe.transformer.config.patch_size, pipe.transformer.config.patch_size),
-            in_chans=pipe.transformer.config.in_channels * 2,
-            embed_dim=pipe.transformer.config.num_attention_heads * pipe.transformer.config.attention_head_dim,
-        )
-        new_img_in = new_img_in.to(pipe.device, dtype=pipe.dtype)
-        new_img_in.proj.weight.zero_()
-        new_img_in.proj.weight[:, :initial_input_channels].copy_(pipe.transformer.x_embedder.proj.weight)
-        if pipe.transformer.x_embedder.proj.bias is not None:
-            new_img_in.proj.bias.copy_(pipe.transformer.x_embedder.proj.bias)
-        pipe.transformer.x_embedder = new_img_in
-    lora_state_dict = safetensors.torch.load_file(lora_path, device="cpu")
-    transformer_lora_state_dict = {f'{k.replace("transformer.", "")}': v for k, v in lora_state_dict.items() if k.startswith("transformer.") and "lora" in k}
-    pipe.load_lora_into_transformer(transformer_lora_state_dict, transformer=pipe.transformer, adapter_name="i2v", _pipeline=pipe)
-    pipe.set_adapters(["i2v"], adapter_weights=[1.0])
-    pipe.fuse_lora(components=["transformer"], lora_scale=1.0, adapter_names=["i2v"])
-    pipe.unload_lora_weights()
-    return pipe
 def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
     # Load and preprocess frames
@@ -317,13 +315,11 @@ def main():
     outputs = [
         gr.Video(label="Generated Video"),
     ]
-        # Create a wrapper function to pass the pre-initialized pipeline
-    def generate_video_wrapper(*args):
-        return generate_video(pipe, *args)
     # Create the Gradio interface
     iface = gr.Interface(
-        fn=generate_video_wrapper,
         inputs=inputs,
         outputs=outputs,
         title="Hunyuan Video Generator",

         transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], inplace=True),
     ]
 )
+model_id = "hunyuanvideo-community/HunyuanVideo"
+lora_path = hf_hub_download("dashtoon/hunyuan-video-keyframe-control-lora", "i2v.sft")  # Replace with the actual LORA path
+transformer = HunyuanVideoTransformer3DModel.from_pretrained(model_id, subfolder="transformer", torch_dtype=torch.bfloat16)
+pipe = HunyuanVideoPipeline.from_pretrained(model_id, transformer=transformer, torch_dtype=torch.bfloat16)
+# Enable memory savings
+pipe.vae.enable_tiling()
+pipe.enable_model_cpu_offload()
+with torch.no_grad():  # enable image inputs
+    initial_input_channels = pipe.transformer.config.in_channels
+    new_img_in = HunyuanVideoPatchEmbed(
+        patch_size=(pipe.transformer.config.patch_size_t, pipe.transformer.config.patch_size, pipe.transformer.config.patch_size),
+        in_chans=pipe.transformer.config.in_channels * 2,
+        embed_dim=pipe.transformer.config.num_attention_heads * pipe.transformer.config.attention_head_dim,
+    )
+    new_img_in = new_img_in.to(pipe.device, dtype=pipe.dtype)
+    new_img_in.proj.weight.zero_()
+    new_img_in.proj.weight[:, :initial_input_channels].copy_(pipe.transformer.x_embedder.proj.weight)
+    if pipe.transformer.x_embedder.proj.bias is not None:
+        new_img_in.proj.bias.copy_(pipe.transformer.x_embedder.proj.bias)
+    pipe.transformer.x_embedder = new_img_in
+lora_state_dict = safetensors.torch.load_file(lora_path, device="cpu")
+transformer_lora_state_dict = {f'{k.replace("transformer.", "")}': v for k, v in lora_state_dict.items() if k.startswith("transformer.") and "lora" in k}
+pipe.load_lora_into_transformer(transformer_lora_state_dict, transformer=pipe.transformer, adapter_name="i2v", _pipeline=pipe)
+pipe.set_adapters(["i2v"], adapter_weights=[1.0])
+pipe.fuse_lora(components=["transformer"], lora_scale=1.0, adapter_names=["i2v"])
+pipe.unload_lora_weights()
 def resize_image_to_bucket(image: Union[Image.Image, np.ndarray], bucket_reso: Tuple[int, int]) -> np.ndarray:
     """
     image = image[crop_top : crop_top + bucket_height, crop_left : crop_left + bucket_width]
     return image
 def generate_video(pipe, prompt: str, frame1_path: str, frame2_path: str, guidance_scale: float, num_frames: int, num_inference_steps: int) -> bytes:
     # Load and preprocess frames
     outputs = [
         gr.Video(label="Generated Video"),
     ]
     # Create the Gradio interface
     iface = gr.Interface(
+        fn=generate_video,
         inputs=inputs,
         outputs=outputs,
         title="Hunyuan Video Generator",