Ksjsjjdj commited on
Commit
3ba39a7
verified
1 Parent(s): f70992d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -129
app.py CHANGED
@@ -1,164 +1,198 @@
1
- import spaces
2
- import torch
3
- from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
4
- from diffusers.utils import export_to_video
5
- import gradio as gr
6
  import tempfile
 
7
  import numpy as np
 
8
  from PIL import Image
9
- import random
 
 
 
 
 
 
 
10
 
11
  MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
12
  vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
13
 
14
- # Initialize pipelines
15
  text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
16
  image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
17
 
18
  for pipe in [text_to_video_pipe, image_to_video_pipe]:
19
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
20
- pipe.to("cuda")
21
 
22
- # Constants
23
  MOD_VALUE = 32
24
- DEFAULT_H_SLIDER_VALUE = 896
25
- DEFAULT_W_SLIDER_VALUE = 896
26
- NEW_FORMULA_MAX_AREA = 720 * 1024
27
- SLIDER_MIN_H, SLIDER_MAX_H = 256, 1024
28
- SLIDER_MIN_W, SLIDER_MAX_W = 256, 1024
29
  MAX_SEED = np.iinfo(np.int32).max
30
  FIXED_FPS = 24
31
  MIN_FRAMES_MODEL = 25
32
  MAX_FRAMES_MODEL = 193
33
 
34
- default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
35
- default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
36
-
37
- def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area, min_slider_h, max_slider_h, min_slider_w, max_slider_w, default_h, default_w):
38
- orig_w, orig_h = pil_image.size
39
- if orig_w <= 0 or orig_h <= 0:
40
- return default_h, default_w
41
- aspect_ratio = orig_h / orig_w
42
-
43
- calc_h = round(np.sqrt(calculation_max_area * aspect_ratio))
44
- calc_w = round(np.sqrt(calculation_max_area / aspect_ratio))
45
- calc_h = max(mod_val, (calc_h // mod_val) * mod_val)
46
- calc_w = max(mod_val, (calc_w // mod_val) * mod_val)
47
-
48
- new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val))
49
- new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val))
50
-
51
- return new_h, new_w
52
-
53
- def handle_image_upload_for_dims_wan(uploaded_pil_image, current_h_val, current_w_val):
54
- if uploaded_pil_image is None:
55
- return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
56
- try:
57
- new_h, new_w = _calculate_new_dimensions_wan(
58
- uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
59
- SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
60
- DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
61
- )
62
- return gr.update(value=new_h), gr.update(value=new_w)
63
- except Exception as e:
64
- gr.Warning("Error attempting to calculate new dimensions")
65
- return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
66
-
67
- def get_duration(input_image, prompt, height, width,
68
- negative_prompt, duration_seconds,
69
- guidance_scale, steps,
70
- seed, randomize_seed,
71
- progress):
72
- if steps > 4 and duration_seconds > 4:
73
- return 90
74
- elif steps > 4 or duration_seconds > 4:
75
- return 75
76
- else:
77
- return 60
78
 
79
- @spaces.GPU(duration=get_duration)
80
- def generate_video(input_image, prompt, height, width, negative_prompt=default_negative_prompt, duration_seconds=2, guidance_scale=0, steps=4, seed=44, randomize_seed=False, progress=gr.Progress(track_tqdm=True)):
81
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
82
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
85
-
86
- current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
87
-
88
- if input_image is not None:
89
- resized_image = input_image.resize((target_w, target_h))
90
- with torch.inference_mode():
91
- output_frames_list = image_to_video_pipe(
92
- image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
93
- height=target_h, width=target_w, num_frames=num_frames,
94
- guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
95
- generator=torch.Generator(device="cuda").manual_seed(current_seed)
96
- ).frames[0]
97
  else:
 
 
 
 
 
98
  with torch.inference_mode():
99
- output_frames_list = text_to_video_pipe(
100
- prompt=prompt, negative_prompt=negative_prompt,
101
- height=target_h, width=target_w, num_frames=num_frames,
102
- guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
103
- generator=torch.Generator(device="cuda").manual_seed(current_seed)
 
 
 
 
104
  ).frames[0]
 
 
 
 
 
 
105
 
106
- with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
107
- video_path = tmpfile.name
108
- export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
109
- return video_path, current_seed
110
 
111
- with gr.Blocks() as demo:
112
- gr.Markdown("# Fast Wan 2.2 TI2V 5B Demo")
113
- gr.Markdown("""This Demo is using [FastWan2.2-TI2V-5B](https://huggingface.co/FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers) which is fine-tuned with Sparse-distill method which allows wan to generate high quality videos in 3-5 steps.""")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- with gr.Row():
116
- with gr.Column():
117
- input_image_component = gr.Image(type="pil", label="Input Image (optional, auto-resized to target H/W)")
118
- prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
119
- duration_seconds_input = gr.Slider(minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1), maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1), step=0.1, value=2, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
120
-
121
- with gr.Accordion("Advanced Settings", open=False):
122
- negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
123
- seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
124
- randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
125
- with gr.Row():
126
- height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
127
- width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
128
- steps_slider = gr.Slider(minimum=1, maximum=8, step=1, value=4, label="Inference Steps")
129
- guidance_scale_input = gr.Slider(minimum=0.0, maximum=5.0, step=0.01, value=0.0, label="Guidance Scale")
130
- generate_button = gr.Button("Generate Video", variant="primary")
131
- with gr.Column():
132
- video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
133
-
134
- input_image_component.upload(
135
- fn=handle_image_upload_for_dims_wan,
136
- inputs=[input_image_component, height_input, width_input],
137
- outputs=[height_input, width_input]
138
- )
139
 
140
- input_image_component.clear(
141
- fn=handle_image_upload_for_dims_wan,
142
- inputs=[input_image_component, height_input, width_input],
143
- outputs=[height_input, width_input]
144
- )
145
 
146
- ui_inputs = [
147
- input_image_component, prompt_input, height_input, width_input,
148
- negative_prompt_input, duration_seconds_input,
149
- guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
150
- ]
151
- generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
152
-
153
- gr.Examples(
154
- examples=[
155
- [None, "A person eating spaghetti", 1024, 720],
156
- ["cat.png", "The cat removes the glasses from its eyes.", 1088, 800],
157
- [None, "a penguin playfully dancing in the snow, Antarctica", 1024, 720],
158
- ["peng.png", "a penguin running towards camera joyfully, Antarctica", 896, 512],
159
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- inputs=[input_image_component, prompt_input, height_input, width_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
 
 
 
 
 
 
 
162
  )
163
 
164
  if __name__ == "__main__":
 
1
+ import os
2
+ import sys
3
+ import gc
 
 
4
  import tempfile
5
+ import random
6
  import numpy as np
7
+ import torch
8
  from PIL import Image
9
+
10
+ os.system("pip install spaces-0.1.0-py3-none-any.whl moviepy==1.0.3 imageio[ffmpeg]")
11
+
12
+ import spaces
13
+ import gradio as gr
14
+ from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
15
+ from diffusers.utils import export_to_video
16
+ from moviepy.editor import VideoFileClip, concatenate_videoclips
17
 
18
  MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
19
  vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
20
 
 
21
  text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
22
  image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
23
 
24
  for pipe in [text_to_video_pipe, image_to_video_pipe]:
25
  pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
 
26
 
 
27
  MOD_VALUE = 32
28
+ DEFAULT_H = 896
29
+ DEFAULT_W = 896
 
 
 
30
  MAX_SEED = np.iinfo(np.int32).max
31
  FIXED_FPS = 24
32
  MIN_FRAMES_MODEL = 25
33
  MAX_FRAMES_MODEL = 193
34
 
35
+ @spaces.GPU()
36
+ def _clean_memory():
37
+ gc.collect()
38
+ torch.cuda.empty_cache()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ @spaces.GPU()
41
+ def generate_video_gpu(input_files, prompt, height, width, negative_prompt, target_frames, guidance_scale, steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
42
  target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
43
  target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
44
+
45
+ # Asegurar que los frames est茅n dentro de los l铆mites del modelo
46
+ num_frames = min(max(int(target_frames), 1), MAX_FRAMES_MODEL)
47
+
48
+ master_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
49
+
50
+ video_clips_paths = []
51
+ pil_images = []
52
+
53
+ if input_files is not None:
54
+ files_list = input_files if isinstance(input_files, list) else [input_files]
55
+ for f in files_list:
56
+ try:
57
+ path = f.name if hasattr(f, "name") else f
58
+ img = Image.open(path).convert("RGB")
59
+ pil_images.append(img)
60
+ except Exception:
61
+ continue
62
+
63
+ if len(pil_images) > 0:
64
+ for i, img in enumerate(pil_images):
65
+ _clean_memory()
66
+
67
+ local_seed = master_seed + i
68
+ generator = torch.Generator(device="cuda").manual_seed(local_seed)
69
+
70
+ resized_image = img.resize((target_w, target_h))
71
+
72
+ try:
73
+ with torch.inference_mode():
74
+ output_frames = image_to_video_pipe(
75
+ image=resized_image,
76
+ prompt=prompt,
77
+ negative_prompt=negative_prompt,
78
+ height=target_h,
79
+ width=target_w,
80
+ num_frames=num_frames,
81
+ guidance_scale=float(guidance_scale),
82
+ num_inference_steps=int(steps),
83
+ generator=generator
84
+ ).frames[0]
85
+
86
+ with tempfile.NamedTemporaryFile(suffix=f"_img_{i}.mp4", delete=False) as tmp:
87
+ export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
88
+ video_clips_paths.append(tmp.name)
89
+
90
+ progress((i + 1) / len(pil_images))
91
+
92
+ except Exception:
93
+ continue
94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  else:
96
+ # Modo Texto a Video: Generamos un solo clip con la cantidad de frames solicitada
97
+ _clean_memory()
98
+
99
+ generator = torch.Generator(device="cuda").manual_seed(master_seed)
100
+
101
  with torch.inference_mode():
102
+ output_frames = text_to_video_pipe(
103
+ prompt=prompt,
104
+ negative_prompt=negative_prompt,
105
+ height=target_h,
106
+ width=target_w,
107
+ num_frames=num_frames,
108
+ guidance_scale=float(guidance_scale),
109
+ num_inference_steps=int(steps),
110
+ generator=generator
111
  ).frames[0]
112
+
113
+ with tempfile.NamedTemporaryFile(suffix="_txt2vid.mp4", delete=False) as tmp:
114
+ export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
115
+ video_clips_paths.append(tmp.name)
116
+
117
+ progress(1.0)
118
 
119
+ _clean_memory()
120
+ return video_clips_paths, master_seed
 
 
121
 
122
+ @spaces.GPU()
123
+ def stitch_videos(video_paths):
124
+ if not video_paths:
125
+ return None
126
+
127
+ if len(video_paths) == 1:
128
+ return video_paths[0]
129
+
130
+ try:
131
+ clips = [VideoFileClip(p) for p in video_paths]
132
+ final_clip = concatenate_videoclips(clips, method="compose")
133
+
134
+ with tempfile.NamedTemporaryFile(suffix="_final.mp4", delete=False) as final_tmp:
135
+ final_path = final_tmp.name
136
+
137
+ final_clip.write_videofile(final_path, codec="libx264", audio=False, fps=FIXED_FPS, logger=None)
138
+
139
+ for c in clips: c.close()
140
+
141
+ return final_path
142
+ except Exception:
143
+ return video_paths[0]
144
 
145
+ @spaces.GPU()
146
+ def main_process(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed):
147
+ clips, used_seed = generate_video_gpu(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed)
148
+ final_video = stitch_videos(clips)
149
+ return final_video, used_seed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
152
+ gr.Markdown("# Fast Wan 2.2 - Generador de Video")
 
 
 
153
 
154
+ with gr.Row():
155
+ with gr.Column(scale=1):
156
+ input_files = gr.File(
157
+ label="Im谩genes de Entrada",
158
+ file_count="multiple",
159
+ type="filepath",
160
+ file_types=["image"]
161
+ )
162
+
163
+ prompt = gr.Textbox(label="Prompt", value="Cinematic view, realistic lighting, 4k, slow motion", lines=2)
164
+
165
+ frames = gr.Slider(
166
+ minimum=MIN_FRAMES_MODEL,
167
+ maximum=MAX_FRAMES_MODEL,
168
+ step=1,
169
+ value=81,
170
+ label="Duraci贸n (Frames)",
171
+ info=f"M谩ximo soportado por el modelo: {MAX_FRAMES_MODEL} frames"
172
+ )
173
+
174
+ with gr.Accordion("Configuraci贸n Avanzada", open=False):
175
+ neg_prompt = gr.Textbox(label="Prompt Negativo", value="low quality, distortion, text, watermark, blurry, ugly", lines=2)
176
+ seed = gr.Slider(label="Semilla", minimum=0, maximum=MAX_SEED, step=1, value=42)
177
+ rand_seed = gr.Checkbox(label="Semilla Aleatoria", value=True)
178
+
179
+ with gr.Row():
180
+ height = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Altura")
181
+ width = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Anchura")
182
+
183
+ steps = gr.Slider(minimum=2, maximum=10, step=1, value=4, label="Pasos")
184
+ scale = gr.Slider(minimum=1.0, maximum=8.0, step=0.1, value=5.0, label="Guidance Scale")
185
+
186
+ btn_gen = gr.Button("Generar", variant="primary", size="lg")
187
 
188
+ with gr.Column(scale=2):
189
+ output_video = gr.Video(label="Resultado Final", autoplay=True)
190
+ output_seed = gr.Number(label="Semilla Usada")
191
+
192
+ btn_gen.click(
193
+ fn=main_process,
194
+ inputs=[input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed],
195
+ outputs=[output_video, output_seed]
196
  )
197
 
198
  if __name__ == "__main__":