RosticFACE commited on
Commit
0988d71
·
verified ·
1 Parent(s): d5238f5

Upload 5 files

Browse files
Files changed (6) hide show
  1. .gitattributes +2 -0
  2. aoti.py +35 -0
  3. app.py +234 -0
  4. kill_bill.jpeg +3 -0
  5. requirements.txt +11 -0
  6. wan_i2v_input.JPG +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ kill_bill.jpeg filter=lfs diff=lfs merge=lfs -text
37
+ wan_i2v_input.JPG filter=lfs diff=lfs merge=lfs -text
aoti.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ """
3
+
4
+ from typing import cast
5
+
6
+ import torch
7
+ from huggingface_hub import hf_hub_download
8
+ from spaces.zero.torch.aoti import ZeroGPUCompiledModel
9
+ from spaces.zero.torch.aoti import ZeroGPUWeights
10
+ from torch._functorch._aot_autograd.subclass_parametrization import unwrap_tensor_subclass_parameters
11
+
12
+
13
+ def _shallow_clone_module(module: torch.nn.Module) -> torch.nn.Module:
14
+ clone = object.__new__(module.__class__)
15
+ clone.__dict__ = module.__dict__.copy()
16
+ clone._parameters = module._parameters.copy()
17
+ clone._buffers = module._buffers.copy()
18
+ clone._modules = {k: _shallow_clone_module(v) for k, v in module._modules.items() if v is not None}
19
+ return clone
20
+
21
+
22
+ def aoti_blocks_load(module: torch.nn.Module, repo_id: str, variant: str | None = None):
23
+ repeated_blocks = cast(list[str], module._repeated_blocks)
24
+ aoti_files = {name: hf_hub_download(
25
+ repo_id=repo_id,
26
+ filename='package.pt2',
27
+ subfolder=name if variant is None else f'{name}.{variant}',
28
+ ) for name in repeated_blocks}
29
+ for block_name, aoti_file in aoti_files.items():
30
+ for block in module.modules():
31
+ if block.__class__.__name__ == block_name:
32
+ block_ = _shallow_clone_module(block)
33
+ unwrap_tensor_subclass_parameters(block_)
34
+ weights = ZeroGPUWeights(block_.state_dict())
35
+ block.forward = ZeroGPUCompiledModel(aoti_file, weights)
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ from diffusers.pipelines.wan.pipeline_wan_i2v import WanImageToVideoPipeline
4
+ from diffusers.models.transformers.transformer_wan import WanTransformer3DModel
5
+ from diffusers.utils.export_utils import export_to_video
6
+ import gradio as gr
7
+ import tempfile
8
+ import numpy as np
9
+ from PIL import Image
10
+ import random
11
+ import gc
12
+
13
+ from torchao.quantization import quantize_
14
+ from torchao.quantization import Float8DynamicActivationFloat8WeightConfig
15
+ from torchao.quantization import Int8WeightOnlyConfig
16
+
17
+ import aoti
18
+
19
+
20
+ MODEL_ID = "Wan-AI/Wan2.2-I2V-A14B-Diffusers"
21
+
22
+ MAX_DIM = 832
23
+ MIN_DIM = 480
24
+ SQUARE_DIM = 640
25
+ MULTIPLE_OF = 16
26
+
27
+ MAX_SEED = np.iinfo(np.int32).max
28
+
29
+ FIXED_FPS = 16 # можно поменять на 24, если хочешь более плавное видео
30
+ MIN_FRAMES_MODEL = 8
31
+
32
+ MIN_DURATION = round(MIN_FRAMES_MODEL / FIXED_FPS, 1)
33
+ DEFAULT_DURATION = 5.0 # значение по умолчанию
34
+
35
+ pipe = WanImageToVideoPipeline.from_pretrained(
36
+ MODEL_ID,
37
+ transformer=WanTransformer3DModel.from_pretrained(
38
+ 'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
39
+ subfolder='transformer',
40
+ torch_dtype=torch.bfloat16,
41
+ device_map='cuda',
42
+ ),
43
+ transformer_2=WanTransformer3DModel.from_pretrained(
44
+ 'cbensimon/Wan2.2-I2V-A14B-bf16-Diffusers',
45
+ subfolder='transformer_2',
46
+ torch_dtype=torch.bfloat16,
47
+ device_map='cuda',
48
+ ),
49
+ torch_dtype=torch.bfloat16,
50
+ ).to('cuda')
51
+
52
+ pipe.load_lora_weights(
53
+ "Kijai/WanVideo_comfy",
54
+ weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
55
+ adapter_name="lightx2v"
56
+ )
57
+ kwargs_lora = {"load_into_transformer_2": True}
58
+ pipe.load_lora_weights(
59
+ "Kijai/WanVideo_comfy",
60
+ weight_name="Lightx2v/lightx2v_I2V_14B_480p_cfg_step_distill_rank128_bf16.safetensors",
61
+ adapter_name="lightx2v_2",
62
+ **kwargs_lora
63
+ )
64
+ pipe.set_adapters(["lightx2v", "lightx2v_2"], adapter_weights=[1., 1.])
65
+ pipe.fuse_lora(adapter_names=["lightx2v"], lora_scale=3., components=["transformer"])
66
+ pipe.fuse_lora(adapter_names=["lightx2v_2"], lora_scale=1., components=["transformer_2"])
67
+ pipe.unload_lora_weights()
68
+
69
+ quantize_(pipe.text_encoder, Int8WeightOnlyConfig())
70
+ quantize_(pipe.transformer, Float8DynamicActivationFloat8WeightConfig())
71
+ quantize_(pipe.transformer_2, Float8DynamicActivationFloat8WeightConfig())
72
+
73
+ aoti.aoti_blocks_load(pipe.transformer, 'zerogpu-aoti/Wan2', variant='fp8da')
74
+ aoti.aoti_blocks_load(pipe.transformer_2, 'zerogpu-aoti/Wan2', variant='fp8da')
75
+
76
+
77
+ default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
78
+ default_negative_prompt = (
79
+ "色调艳丽, 过曝, 静态, 细节模糊不清, 字幕, 风格, 作品, 画作, 画面, 静止, "
80
+ "整体发灰, 最差质量, 低质量, JPEG压缩残留, 丑陋的, 残缺的, 多余的手指, "
81
+ "画得不好的手部, 画得不好的脸部, 畸形的, 毁容的, 形态畸形的肢体, 手指融合, "
82
+ "静止不动的画面, 杂乱的背景, 三条腿, 背景人很多, 倒着走"
83
+ )
84
+
85
+
86
+ # ✅ Исправленная функция resize_image (больше не вызовет ошибок)
87
+ def resize_image(image: Image.Image) -> Image.Image:
88
+ width, height = image.size
89
+
90
+ if width == height:
91
+ return image.resize((SQUARE_DIM, SQUARE_DIM), Image.LANCZOS)
92
+
93
+ aspect_ratio = width / height
94
+ MAX_ASPECT_RATIO = MAX_DIM / MIN_DIM
95
+ MIN_ASPECT_RATIO = MIN_DIM / MAX_DIM
96
+
97
+ # Устанавливаем безопасные значения по умолчанию
98
+ target_w, target_h = width, height
99
+ image_to_resize = image
100
+
101
+ if aspect_ratio > MAX_ASPECT_RATIO:
102
+ crop_width = int(round(height * MAX_ASPECT_RATIO))
103
+ left = (width - crop_width) // 2
104
+ image_to_resize = image.crop((left, 0, left + crop_width, height))
105
+ target_w = MAX_DIM
106
+ target_h = int(round(target_w / MAX_ASPECT_RATIO))
107
+ elif aspect_ratio < MIN_ASPECT_RATIO:
108
+ crop_height = int(round(width / MIN_ASPECT_RATIO))
109
+ top = (height - crop_height) // 2
110
+ image_to_resize = image.crop((0, top, width, top + crop_height))
111
+ target_h = MAX_DIM
112
+ target_w = int(round(target_h * MIN_ASPECT_RATIO))
113
+ else:
114
+ if width > height:
115
+ target_w = MAX_DIM
116
+ target_h = int(round(target_w / aspect_ratio))
117
+ else:
118
+ target_h = MAX_DIM
119
+ target_w = int(round(target_h * aspect_ratio))
120
+
121
+ final_w = round(target_w / MULTIPLE_OF) * MULTIPLE_OF
122
+ final_h = round(target_h / MULTIPLE_OF) * MULTIPLE_OF
123
+ final_w = max(MIN_DIM, min(MAX_DIM, final_w))
124
+ final_h = max(MIN_DIM, min(MAX_DIM, final_h))
125
+ return image_to_resize.resize((final_w, final_h), Image.LANCZOS)
126
+
127
+
128
+ def get_num_frames(duration_seconds: float):
129
+ # ✅ убрано ограничение на MAX_FRAMES_MODEL
130
+ return 1 + int(round(duration_seconds * FIXED_FPS))
131
+
132
+
133
+ def get_duration(
134
+ input_image,
135
+ prompt,
136
+ steps,
137
+ negative_prompt,
138
+ duration_seconds,
139
+ guidance_scale,
140
+ guidance_scale_2,
141
+ seed,
142
+ randomize_seed,
143
+ progress,
144
+ ):
145
+ BASE_FRAMES_HEIGHT_WIDTH = 81 * 832 * 624
146
+ BASE_STEP_DURATION = 15
147
+ width, height = resize_image(input_image).size
148
+ frames = get_num_frames(duration_seconds)
149
+ factor = frames * width * height / BASE_FRAMES_HEIGHT_WIDTH
150
+ step_duration = BASE_STEP_DURATION * factor ** 1.5
151
+ return 10 + int(steps) * step_duration
152
+
153
+
154
+ @spaces.GPU(duration=get_duration)
155
+ def generate_video(
156
+ input_image,
157
+ prompt,
158
+ steps=4,
159
+ negative_prompt=default_negative_prompt,
160
+ duration_seconds=DEFAULT_DURATION,
161
+ guidance_scale=1,
162
+ guidance_scale_2=1,
163
+ seed=42,
164
+ randomize_seed=False,
165
+ progress=gr.Progress(track_tqdm=True),
166
+ ):
167
+ if input_image is None:
168
+ raise gr.Error("Please upload an input image.")
169
+
170
+ num_frames = get_num_frames(duration_seconds)
171
+ current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
172
+ resized_image = resize_image(input_image)
173
+
174
+ output_frames_list = pipe(
175
+ image=resized_image,
176
+ prompt=prompt,
177
+ negative_prompt=negative_prompt,
178
+ height=resized_image.height,
179
+ width=resized_image.width,
180
+ num_frames=num_frames,
181
+ guidance_scale=float(guidance_scale),
182
+ guidance_scale_2=float(guidance_scale_2),
183
+ num_inference_steps=int(steps),
184
+ generator=torch.Generator(device="cuda").manual_seed(current_seed),
185
+ ).frames[0]
186
+
187
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
188
+ video_path = tmpfile.name
189
+
190
+ export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
191
+ return video_path, current_seed
192
+
193
+
194
+ # --- Gradio Interface ---
195
+ with gr.Blocks() as demo:
196
+ gr.Markdown("# 🚀 Wan 2.2 I2V (14B) — Unlimited Duration Edition 🕒")
197
+ gr.Markdown("Generate cinematic I2V animations without duration limits. Optimized for RTX 4090.")
198
+
199
+ with gr.Row():
200
+ with gr.Column():
201
+ input_image_component = gr.Image(type="pil", label="Input Image")
202
+ prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
203
+
204
+ duration_seconds_input = gr.Slider(
205
+ minimum=0.5,
206
+ maximum=60.0, # можно поднять до 120.0
207
+ step=0.5,
208
+ value=DEFAULT_DURATION,
209
+ label="Duration (seconds)",
210
+ info=f"Each second = {FIXED_FPS} frames. Longer videos require more VRAM/time."
211
+ )
212
+
213
+ with gr.Accordion("Advanced Settings", open=False):
214
+ negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
215
+ seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
216
+ randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
217
+ steps_slider = gr.Slider(minimum=1, maximum=30, step=1, value=6, label="Inference Steps")
218
+ guidance_scale_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale - high noise stage")
219
+ guidance_scale_2_input = gr.Slider(minimum=0.0, maximum=10.0, step=0.5, value=1, label="Guidance Scale 2 - low noise stage")
220
+
221
+ generate_button = gr.Button("Generate Video", variant="primary")
222
+ with gr.Column():
223
+ video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
224
+
225
+ ui_inputs = [
226
+ input_image_component, prompt_input, steps_slider,
227
+ negative_prompt_input, duration_seconds_input,
228
+ guidance_scale_input, guidance_scale_2_input,
229
+ seed_input, randomize_seed_checkbox
230
+ ]
231
+ generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
232
+
233
+ if __name__ == "__main__":
234
+ demo.queue().launch(mcp_server=True)
kill_bill.jpeg ADDED

Git LFS Details

  • SHA256: d1db15fcc022a6c639d14d4b246c40729af2873ca81d4acf7b48d36d62b8d864
  • Pointer size: 131 Bytes
  • Size of remote file: 240 kB
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/linoytsaban/diffusers.git@wan22-loras
2
+
3
+ transformers
4
+ accelerate
5
+ safetensors
6
+ sentencepiece
7
+ peft
8
+ ftfy
9
+ imageio-ffmpeg
10
+ opencv-python
11
+ torchao==0.11.0
wan_i2v_input.JPG ADDED

Git LFS Details

  • SHA256: 077e3d965090c9028c69c00931675f42e1acc815c6eb450ab291b3b72d211a8e
  • Pointer size: 131 Bytes
  • Size of remote file: 251 kB