Spaces:

ShuoChen20
/

DimensionX

Running on Zero

App Files Files Community

陈硕 commited on Nov 13, 2024

Commit

3a2f1ee

1 Parent(s): f8acb76

update orbit lora

Browse files

Files changed (1) hide show

app.py +35 -6

app.py CHANGED Viewed

@@ -55,11 +55,21 @@ pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
     text_encoder=pipe.text_encoder,
     torch_dtype=torch.bfloat16,
 )
-lora_path = "wenqsun/DimensionX"
-lora_rank = 256
-pipe_image.load_lora_weights(lora_path, weight_name="orbit_left_lora_weights.safetensors", adapter_name="orbit_left")
-pipe_image.fuse_lora(lora_scale=1 / lora_rank)
-pipe_image = pipe_image.to(device)
 # pipe.transformer.to(memory_format=torch.channels_last)
@@ -213,6 +223,7 @@ def convert_prompt(prompt: str, retry_times: int = 3) -> str:
 @spaces.GPU
 def infer(
     prompt: str,
     image_input: str,
     num_inference_steps: int,
     guidance_scale: float,
@@ -235,6 +246,16 @@ def infer(
     #         guidance_scale=guidance_scale,
     #         generator=torch.Generator(device="cpu").manual_seed(seed),
     #     ).frames
     if image_input is not None:
         image_input = Image.fromarray(image_input).resize(size=(720, 480))  # Convert to PIL
         image = load_image(image_input)
@@ -301,6 +322,12 @@ with gr.Blocks() as demo:
             </div>
            """)
     with gr.Row():
         with gr.Column():
             with gr.Accordion("I2V: Image Input (cannot be used simultaneously with video input)", open=False):
                 image_input = gr.Image(label="Input Image (will be cropped to 720 * 480)")
@@ -340,6 +367,7 @@ with gr.Blocks() as demo:
     def generate(
         prompt,
         image_input,
         # video_input,
         # video_strength,
@@ -350,6 +378,7 @@ with gr.Blocks() as demo:
     ):
         latents, seed = infer(
             prompt,
             image_input,
             # video_input,
             # video_strength,
@@ -386,7 +415,7 @@ with gr.Blocks() as demo:
     generate_button.click(
         generate,
-        inputs=[prompt, image_input, seed_param, enable_scale, enable_rife],
         outputs=[video_output, download_video_button, download_gif_button, seed_text],
     )

     text_encoder=pipe.text_encoder,
     torch_dtype=torch.bfloat16,
 )
+os.makedirs("checkpoints", exist_ok=True)
+# Download LoRA weights
+hf_hub_download(
+    repo_id="wenqsun/DimensionX",
+    filename="orbit_left_lora_weights.safetensors",
+    local_dir="checkpoints"
+)
+hf_hub_download(
+    repo_id="wenqsun/DimensionX",
+    filename="orbit_up_lora_weights.safetensors",
+    local_dir="checkpoints"
+)
 # pipe.transformer.to(memory_format=torch.channels_last)
 @spaces.GPU
 def infer(
     prompt: str,
+    orbit_type: str,
     image_input: str,
     num_inference_steps: int,
     guidance_scale: float,
     #         guidance_scale=guidance_scale,
     #         generator=torch.Generator(device="cpu").manual_seed(seed),
     #     ).frames
+    lora_path = "checkpoints/"
+    weight_name = "orbit_left_lora_weights.safetensors" if orbit_type == "Left" else "orbit_up_lora_weights.safetensors"
+    lora_rank = 256
+    adapter_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    # Load LoRA weights on CPU
+    pipe.load_lora_weights(lora_path, weight_name=weight_name, adapter_name=f"adapter_{adapter_timestamp}")
+    pipe.fuse_lora(lora_scale=1 / lora_rank)
     if image_input is not None:
         image_input = Image.fromarray(image_input).resize(size=(720, 480))  # Convert to PIL
         image = load_image(image_input)
             </div>
            """)
     with gr.Row():
+        with gr.Column():
+                image_in = gr.Image(label="Image Input", type="filepath")
+                prompt = gr.Textbox(label="Prompt")
+                orbit_type = gr.Radio(label="Orbit type", choices=["Left", "Up"], value="Left", interactive=True)
+                submit_btn = gr.Button("Submit")
         with gr.Column():
             with gr.Accordion("I2V: Image Input (cannot be used simultaneously with video input)", open=False):
                 image_input = gr.Image(label="Input Image (will be cropped to 720 * 480)")
     def generate(
         prompt,
+        orbit_type,
         image_input,
         # video_input,
         # video_strength,
     ):
         latents, seed = infer(
             prompt,
+            orbit_type,
             image_input,
             # video_input,
             # video_strength,
     generate_button.click(
         generate,
+        inputs=[prompt, orbit_type, image_input, seed_param, enable_scale, enable_rife],
         outputs=[video_output, download_video_button, download_gif_button, seed_text],
     )