Spaces:
Running
on
Zero
Running
on
Zero
Commit
Β·
9530e57
1
Parent(s):
6d0f162
Awesome new Lora
Browse files
app.py
CHANGED
|
@@ -5,6 +5,8 @@ from diffusers.utils import export_to_video
|
|
| 5 |
from transformers import CLIPVisionModel
|
| 6 |
import gradio as gr
|
| 7 |
import tempfile
|
|
|
|
|
|
|
| 8 |
|
| 9 |
from huggingface_hub import hf_hub_download
|
| 10 |
import numpy as np
|
|
@@ -12,15 +14,12 @@ from PIL import Image
|
|
| 12 |
import random
|
| 13 |
|
| 14 |
# Base MODEL_ID (using original Wan model that's compatible with diffusers)
|
| 15 |
-
MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-
|
| 16 |
|
| 17 |
-
# FusionX enhancement
|
| 18 |
-
LORA_REPO_ID = "
|
| 19 |
-
LORA_FILENAME = "
|
| 20 |
-
|
| 21 |
-
# Additional enhancement LoRAs for FusionX-like quality
|
| 22 |
-
ACCVIDEO_LORA_REPO = "alibaba-pai/Wan2.1-Fun-Reward-LoRAs"
|
| 23 |
-
MPS_LORA_FILENAME = "Wan2.1-Fun-14B-InP-MPS.safetensors"
|
| 24 |
|
| 25 |
# Load enhanced model components
|
| 26 |
print("π Loading FusionX Enhanced Wan2.1 I2V Model...")
|
|
@@ -34,37 +33,18 @@ pipe = WanImageToVideoPipeline.from_pretrained(
|
|
| 34 |
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
|
| 35 |
pipe.to("cuda")
|
| 36 |
|
| 37 |
-
# Load FusionX
|
| 38 |
-
lora_adapters = []
|
| 39 |
-
lora_weights = []
|
| 40 |
-
|
| 41 |
-
try:
|
| 42 |
-
# Load CausVid LoRA (strength 1.0 as per FusionX)
|
| 43 |
-
causvid_path = hf_hub_download(repo_id=LORA_REPO_ID, filename=LORA_FILENAME)
|
| 44 |
-
pipe.load_lora_weights(causvid_path, adapter_name="causvid_lora")
|
| 45 |
-
lora_adapters.append("causvid_lora")
|
| 46 |
-
lora_weights.append(1.0) # FusionX uses 1.0 for CausVid
|
| 47 |
-
print("β
CausVid LoRA loaded (strength: 1.0)")
|
| 48 |
-
except Exception as e:
|
| 49 |
-
print(f"β οΈ CausVid LoRA not loaded: {e}")
|
| 50 |
-
|
| 51 |
try:
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
| 58 |
except Exception as e:
|
| 59 |
-
print(f"β οΈ
|
| 60 |
-
|
| 61 |
-
# Apply LoRA adapters if any were loaded
|
| 62 |
-
if lora_adapters:
|
| 63 |
-
pipe.set_adapters(lora_adapters, adapter_weights=lora_weights)
|
| 64 |
-
pipe.fuse_lora()
|
| 65 |
-
print(f"π₯ FusionX Enhancement Applied: {len(lora_adapters)} LoRAs fused")
|
| 66 |
-
else:
|
| 67 |
-
print("π No LoRAs loaded - using base Wan model")
|
| 68 |
|
| 69 |
MOD_VALUE = 32
|
| 70 |
DEFAULT_H_SLIDER_VALUE = 576 # FusionX optimized default
|
|
@@ -288,6 +268,17 @@ input[type="checkbox"] {
|
|
| 288 |
}
|
| 289 |
"""
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
|
| 292 |
min_slider_h, max_slider_h,
|
| 293 |
min_slider_w, max_slider_w,
|
|
@@ -325,7 +316,7 @@ def handle_image_upload_for_dims_wan(uploaded_pil_image, current_h_val, current_
|
|
| 325 |
|
| 326 |
def get_duration(input_image, prompt, height, width,
|
| 327 |
negative_prompt, duration_seconds,
|
| 328 |
-
guidance_scale, steps,
|
| 329 |
seed, randomize_seed,
|
| 330 |
progress):
|
| 331 |
# FusionX optimized duration calculation
|
|
@@ -339,7 +330,7 @@ def get_duration(input_image, prompt, height, width,
|
|
| 339 |
@spaces.GPU(duration=get_duration)
|
| 340 |
def generate_video(input_image, prompt, height, width,
|
| 341 |
negative_prompt=default_negative_prompt, duration_seconds=3,
|
| 342 |
-
guidance_scale=1, steps=8,
|
| 343 |
seed=42, randomize_seed=False,
|
| 344 |
progress=gr.Progress(track_tqdm=True)):
|
| 345 |
|
|
@@ -368,11 +359,17 @@ def generate_video(input_image, prompt, height, width,
|
|
| 368 |
num_frames=num_frames,
|
| 369 |
guidance_scale=float(guidance_scale),
|
| 370 |
num_inference_steps=int(steps),
|
| 371 |
-
generator=torch.Generator(device="cuda").manual_seed(current_seed)
|
|
|
|
| 372 |
).frames[0]
|
| 373 |
|
| 374 |
-
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
| 377 |
return video_path, current_seed
|
| 378 |
|
|
@@ -439,6 +436,14 @@ with gr.Blocks() as demo:
|
|
| 439 |
value=DEFAULT_W_SLIDER_VALUE,
|
| 440 |
label=f"π Output Width (FusionX optimized: {MOD_VALUE} multiples)"
|
| 441 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
steps_slider = gr.Slider(
|
| 443 |
minimum=1,
|
| 444 |
maximum=20,
|
|
@@ -466,7 +471,8 @@ with gr.Blocks() as demo:
|
|
| 466 |
video_output = gr.Video(
|
| 467 |
label="π₯ FusionX Enhanced Generated Video",
|
| 468 |
autoplay=True,
|
| 469 |
-
interactive=False
|
|
|
|
| 470 |
)
|
| 471 |
|
| 472 |
input_image_component.upload(
|
|
@@ -484,23 +490,10 @@ with gr.Blocks() as demo:
|
|
| 484 |
ui_inputs = [
|
| 485 |
input_image_component, prompt_input, height_input, width_input,
|
| 486 |
negative_prompt_input, duration_seconds_input,
|
| 487 |
-
guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
|
| 488 |
]
|
| 489 |
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
| 490 |
|
| 491 |
-
with gr.Column():
|
| 492 |
-
gr.Examples(
|
| 493 |
-
examples=[
|
| 494 |
-
["peng.png", "a penguin gracefully dancing in the pristine snow, cinematic motion with detailed feathers", 576, 576],
|
| 495 |
-
["frog.jpg", "the frog jumps energetically with smooth, lifelike motion and detailed texture", 576, 576],
|
| 496 |
-
],
|
| 497 |
-
inputs=[input_image_component, prompt_input, height_input, width_input],
|
| 498 |
-
outputs=[video_output, seed_input],
|
| 499 |
-
fn=generate_video,
|
| 500 |
-
cache_examples="lazy",
|
| 501 |
-
label="π FusionX Enhanced Example Gallery"
|
| 502 |
-
)
|
| 503 |
-
|
| 504 |
|
| 505 |
if __name__ == "__main__":
|
| 506 |
demo.queue().launch()
|
|
|
|
| 5 |
from transformers import CLIPVisionModel
|
| 6 |
import gradio as gr
|
| 7 |
import tempfile
|
| 8 |
+
import re
|
| 9 |
+
import os
|
| 10 |
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
import numpy as np
|
|
|
|
| 14 |
import random
|
| 15 |
|
| 16 |
# Base MODEL_ID (using original Wan model that's compatible with diffusers)
|
| 17 |
+
MODEL_ID = "Wan-AI/Wan2.1-I2V-14B-720P-Diffusers"
|
| 18 |
|
| 19 |
+
# Merged FusionX enhancement LoRA
|
| 20 |
+
LORA_REPO_ID = "vrgamedevgirl84/Wan14BT2VFusioniX"
|
| 21 |
+
LORA_FILENAME = "Wan2.1_I2V_14B_FusionX_LoRA.safetensors"
|
| 22 |
+
LORA_SUBFOLDER = "FusionX_LoRa"
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Load enhanced model components
|
| 25 |
print("π Loading FusionX Enhanced Wan2.1 I2V Model...")
|
|
|
|
| 33 |
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
|
| 34 |
pipe.to("cuda")
|
| 35 |
|
| 36 |
+
# Load and fuse the single merged FusionX LoRA
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
try:
|
| 38 |
+
lora_path = hf_hub_download(
|
| 39 |
+
repo_id=LORA_REPO_ID,
|
| 40 |
+
filename=LORA_FILENAME,
|
| 41 |
+
subfolder=LORA_SUBFOLDER
|
| 42 |
+
)
|
| 43 |
+
pipe.load_lora_weights(lora_path, adapter_name="fusionx")
|
| 44 |
+
print("β
Merged FusionX LoRA loaded. Use the 'LoRA Strength' slider to control the effect.")
|
| 45 |
except Exception as e:
|
| 46 |
+
print(f"β οΈ Merged FusionX LoRA not loaded: {e}")
|
| 47 |
+
print("π Using base Wan model without LoRA enhancement. The 'LoRA Strength' slider will have no effect.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
MOD_VALUE = 32
|
| 50 |
DEFAULT_H_SLIDER_VALUE = 576 # FusionX optimized default
|
|
|
|
| 268 |
}
|
| 269 |
"""
|
| 270 |
|
| 271 |
+
def sanitize_prompt_for_filename(prompt: str, max_len: int = 60) -> str:
|
| 272 |
+
"""Sanitizes a prompt string to be used as a valid filename."""
|
| 273 |
+
if not prompt:
|
| 274 |
+
prompt = "video"
|
| 275 |
+
# Remove non-alphanumeric characters (except spaces, hyphens, underscores)
|
| 276 |
+
sanitized = re.sub(r'[^\w\s_-]', '', prompt).strip()
|
| 277 |
+
# Replace spaces and multiple hyphens/underscores with a single underscore
|
| 278 |
+
sanitized = re.sub(r'[\s_-]+', '_', sanitized)
|
| 279 |
+
# Truncate to max_len
|
| 280 |
+
return sanitized[:max_len]
|
| 281 |
+
|
| 282 |
def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area,
|
| 283 |
min_slider_h, max_slider_h,
|
| 284 |
min_slider_w, max_slider_w,
|
|
|
|
| 316 |
|
| 317 |
def get_duration(input_image, prompt, height, width,
|
| 318 |
negative_prompt, duration_seconds,
|
| 319 |
+
guidance_scale, steps, lora_scale,
|
| 320 |
seed, randomize_seed,
|
| 321 |
progress):
|
| 322 |
# FusionX optimized duration calculation
|
|
|
|
| 330 |
@spaces.GPU(duration=get_duration)
|
| 331 |
def generate_video(input_image, prompt, height, width,
|
| 332 |
negative_prompt=default_negative_prompt, duration_seconds=3,
|
| 333 |
+
guidance_scale=1, steps=8, lora_scale=1.0,
|
| 334 |
seed=42, randomize_seed=False,
|
| 335 |
progress=gr.Progress(track_tqdm=True)):
|
| 336 |
|
|
|
|
| 359 |
num_frames=num_frames,
|
| 360 |
guidance_scale=float(guidance_scale),
|
| 361 |
num_inference_steps=int(steps),
|
| 362 |
+
generator=torch.Generator(device="cuda").manual_seed(current_seed),
|
| 363 |
+
cross_attention_kwargs={"scale": float(lora_scale)}
|
| 364 |
).frames[0]
|
| 365 |
|
| 366 |
+
# Create a unique filename for download
|
| 367 |
+
sanitized_prompt = sanitize_prompt_for_filename(prompt)
|
| 368 |
+
filename = f"{sanitized_prompt}_{current_seed}.mp4"
|
| 369 |
+
|
| 370 |
+
temp_dir = tempfile.mkdtemp()
|
| 371 |
+
video_path = os.path.join(temp_dir, filename)
|
| 372 |
+
|
| 373 |
export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
|
| 374 |
return video_path, current_seed
|
| 375 |
|
|
|
|
| 436 |
value=DEFAULT_W_SLIDER_VALUE,
|
| 437 |
label=f"π Output Width (FusionX optimized: {MOD_VALUE} multiples)"
|
| 438 |
)
|
| 439 |
+
lora_scale_slider = gr.Slider(
|
| 440 |
+
minimum=0.0,
|
| 441 |
+
maximum=2.5,
|
| 442 |
+
step=0.05,
|
| 443 |
+
value=1.0,
|
| 444 |
+
label="πͺ FusionX LoRA Strength",
|
| 445 |
+
info="Control the intensity of the FusionX effect. >1.0 for stronger effect, <1.0 for less."
|
| 446 |
+
)
|
| 447 |
steps_slider = gr.Slider(
|
| 448 |
minimum=1,
|
| 449 |
maximum=20,
|
|
|
|
| 471 |
video_output = gr.Video(
|
| 472 |
label="π₯ FusionX Enhanced Generated Video",
|
| 473 |
autoplay=True,
|
| 474 |
+
interactive=False,
|
| 475 |
+
download=True
|
| 476 |
)
|
| 477 |
|
| 478 |
input_image_component.upload(
|
|
|
|
| 490 |
ui_inputs = [
|
| 491 |
input_image_component, prompt_input, height_input, width_input,
|
| 492 |
negative_prompt_input, duration_seconds_input,
|
| 493 |
+
guidance_scale_input, steps_slider, lora_scale_slider, seed_input, randomize_seed_checkbox
|
| 494 |
]
|
| 495 |
generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
|
| 496 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
|
| 498 |
if __name__ == "__main__":
|
| 499 |
demo.queue().launch()
|