wan2-2-fastx

Paused

App Files Files Community

Ksjsjjdj commited on Dec 20, 2025

Commit

3ba39a7

verified ·

1 Parent(s): f70992d

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -129

app.py CHANGED Viewed

@@ -1,164 +1,198 @@
-import spaces
-import torch
-from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
-from diffusers.utils import export_to_video
-import gradio as gr
 import tempfile
 import numpy as np
 from PIL import Image
-import random
 MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
-# Initialize pipelines
 text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
 image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
 for pipe in [text_to_video_pipe, image_to_video_pipe]:
     pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
-    pipe.to("cuda")
-# Constants
 MOD_VALUE = 32
-DEFAULT_H_SLIDER_VALUE = 896
-DEFAULT_W_SLIDER_VALUE = 896
-NEW_FORMULA_MAX_AREA = 720 * 1024
-SLIDER_MIN_H, SLIDER_MAX_H = 256, 1024
-SLIDER_MIN_W, SLIDER_MAX_W = 256, 1024
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 24
 MIN_FRAMES_MODEL = 25
 MAX_FRAMES_MODEL = 193
-default_prompt_i2v = "make this image come alive, cinematic motion, smooth animation"
-default_negative_prompt = "Bright tones, overexposed, static, blurred details, subtitles, style, works, paintings, images, static, overall gray, worst quality, low quality, JPEG compression residue, ugly, incomplete, extra fingers, poorly drawn hands, poorly drawn faces, deformed, disfigured, misshapen limbs, fused fingers, still picture, messy background, three legs, many people in the background, walking backwards, watermark, text, signature"
-def _calculate_new_dimensions_wan(pil_image, mod_val, calculation_max_area, min_slider_h, max_slider_h, min_slider_w, max_slider_w, default_h, default_w):
-    orig_w, orig_h = pil_image.size
-    if orig_w <= 0 or orig_h <= 0:
-        return default_h, default_w
-    aspect_ratio = orig_h / orig_w
-    calc_h = round(np.sqrt(calculation_max_area * aspect_ratio))
-    calc_w = round(np.sqrt(calculation_max_area / aspect_ratio))
-    calc_h = max(mod_val, (calc_h // mod_val) * mod_val)
-    calc_w = max(mod_val, (calc_w // mod_val) * mod_val)
-    new_h = int(np.clip(calc_h, min_slider_h, (max_slider_h // mod_val) * mod_val))
-    new_w = int(np.clip(calc_w, min_slider_w, (max_slider_w // mod_val) * mod_val))
-    return new_h, new_w
-def handle_image_upload_for_dims_wan(uploaded_pil_image, current_h_val, current_w_val):
-    if uploaded_pil_image is None:
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
-    try:
-        new_h, new_w = _calculate_new_dimensions_wan(
-            uploaded_pil_image, MOD_VALUE, NEW_FORMULA_MAX_AREA,
-            SLIDER_MIN_H, SLIDER_MAX_H, SLIDER_MIN_W, SLIDER_MAX_W,
-            DEFAULT_H_SLIDER_VALUE, DEFAULT_W_SLIDER_VALUE
-        )
-        return gr.update(value=new_h), gr.update(value=new_w)
-    except Exception as e:
-        gr.Warning("Error attempting to calculate new dimensions")
-        return gr.update(value=DEFAULT_H_SLIDER_VALUE), gr.update(value=DEFAULT_W_SLIDER_VALUE)
-def get_duration(input_image, prompt, height, width,
-                   negative_prompt, duration_seconds,
-                   guidance_scale, steps,
-                   seed, randomize_seed,
-                   progress):
-    if steps > 4 and duration_seconds > 4:
-        return 90
-    elif steps > 4 or duration_seconds > 4:
-        return 75
-    else:
-        return 60
-@spaces.GPU(duration=get_duration)
-def generate_video(input_image, prompt, height, width, negative_prompt=default_negative_prompt, duration_seconds=2, guidance_scale=0, steps=4, seed=44, randomize_seed=False, progress=gr.Progress(track_tqdm=True)):
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
-    num_frames = np.clip(int(round(duration_seconds * FIXED_FPS)), MIN_FRAMES_MODEL, MAX_FRAMES_MODEL)
-    current_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
-    if input_image is not None:
-        resized_image = input_image.resize((target_w, target_h))
-        with torch.inference_mode():
-            output_frames_list = image_to_video_pipe(
-                image=resized_image, prompt=prompt, negative_prompt=negative_prompt,
-                height=target_h, width=target_w, num_frames=num_frames,
-                guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
-                generator=torch.Generator(device="cuda").manual_seed(current_seed)
-            ).frames[0]
     else:
         with torch.inference_mode():
-            output_frames_list = text_to_video_pipe(
-                prompt=prompt, negative_prompt=negative_prompt,
-                height=target_h, width=target_w, num_frames=num_frames,
-                guidance_scale=float(guidance_scale), num_inference_steps=int(steps),
-                generator=torch.Generator(device="cuda").manual_seed(current_seed)
             ).frames[0]
-    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmpfile:
-        video_path = tmpfile.name
-    export_to_video(output_frames_list, video_path, fps=FIXED_FPS)
-    return video_path, current_seed
-with gr.Blocks() as demo:
-    gr.Markdown("# Fast Wan 2.2 TI2V 5B Demo")
-    gr.Markdown("""This Demo is using [FastWan2.2-TI2V-5B](https://huggingface.co/FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers) which is fine-tuned with Sparse-distill method which allows wan to generate high quality videos in 3-5 steps.""")
-    with gr.Row():
-        with gr.Column():
-            input_image_component = gr.Image(type="pil", label="Input Image (optional, auto-resized to target H/W)")
-            prompt_input = gr.Textbox(label="Prompt", value=default_prompt_i2v)
-            duration_seconds_input = gr.Slider(minimum=round(MIN_FRAMES_MODEL/FIXED_FPS,1), maximum=round(MAX_FRAMES_MODEL/FIXED_FPS,1), step=0.1, value=2, label="Duration (seconds)", info=f"Clamped to model's {MIN_FRAMES_MODEL}-{MAX_FRAMES_MODEL} frames at {FIXED_FPS}fps.")
-            with gr.Accordion("Advanced Settings", open=False):
-                negative_prompt_input = gr.Textbox(label="Negative Prompt", value=default_negative_prompt, lines=3)
-                seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42, interactive=True)
-                randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True, interactive=True)
-                with gr.Row():
-                    height_input = gr.Slider(minimum=SLIDER_MIN_H, maximum=SLIDER_MAX_H, step=MOD_VALUE, value=DEFAULT_H_SLIDER_VALUE, label=f"Output Height (multiple of {MOD_VALUE})")
-                    width_input = gr.Slider(minimum=SLIDER_MIN_W, maximum=SLIDER_MAX_W, step=MOD_VALUE, value=DEFAULT_W_SLIDER_VALUE, label=f"Output Width (multiple of {MOD_VALUE})")
-                steps_slider = gr.Slider(minimum=1, maximum=8, step=1, value=4, label="Inference Steps")
-                guidance_scale_input = gr.Slider(minimum=0.0, maximum=5.0, step=0.01, value=0.0, label="Guidance Scale")
-            generate_button = gr.Button("Generate Video", variant="primary")
-        with gr.Column():
-            video_output = gr.Video(label="Generated Video", autoplay=True, interactive=False)
-    input_image_component.upload(
-        fn=handle_image_upload_for_dims_wan,
-        inputs=[input_image_component, height_input, width_input],
-        outputs=[height_input, width_input]
-    )
-    input_image_component.clear(
-        fn=handle_image_upload_for_dims_wan,
-        inputs=[input_image_component, height_input, width_input],
-        outputs=[height_input, width_input]
-    )
-    ui_inputs = [
-        input_image_component, prompt_input, height_input, width_input,
-        negative_prompt_input, duration_seconds_input,
-        guidance_scale_input, steps_slider, seed_input, randomize_seed_checkbox
-    ]
-    generate_button.click(fn=generate_video, inputs=ui_inputs, outputs=[video_output, seed_input])
-    gr.Examples(
-        examples=[
-            [None, "A person eating spaghetti", 1024, 720],
-            ["cat.png", "The cat removes the glasses from its eyes.", 1088, 800],
-            [None, "a penguin playfully dancing in the snow, Antarctica", 1024, 720],
-            ["peng.png", "a penguin running towards camera joyfully, Antarctica", 896, 512],
-        ],
-        inputs=[input_image_component, prompt_input, height_input, width_input], outputs=[video_output, seed_input], fn=generate_video, cache_examples="lazy"
     )
 if __name__ == "__main__":

+import os
+import sys
+import gc
 import tempfile
+import random
 import numpy as np
+import torch
 from PIL import Image
+os.system("pip install spaces-0.1.0-py3-none-any.whl moviepy==1.0.3 imageio[ffmpeg]")
+import spaces
+import gradio as gr
+from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
+from diffusers.utils import export_to_video
+from moviepy.editor import VideoFileClip, concatenate_videoclips
 MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
 vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
 text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
 image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
 for pipe in [text_to_video_pipe, image_to_video_pipe]:
     pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
 MOD_VALUE = 32
+DEFAULT_H = 896
+DEFAULT_W = 896
 MAX_SEED = np.iinfo(np.int32).max
 FIXED_FPS = 24
 MIN_FRAMES_MODEL = 25
 MAX_FRAMES_MODEL = 193
+@spaces.GPU()
+def _clean_memory():
+    gc.collect()
+    torch.cuda.empty_cache()
+@spaces.GPU()
+def generate_video_gpu(input_files, prompt, height, width, negative_prompt, target_frames, guidance_scale, steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
     target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
     target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
+    # Asegurar que los frames estén dentro de los límites del modelo
+    num_frames = min(max(int(target_frames), 1), MAX_FRAMES_MODEL)
+    master_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
+    video_clips_paths = []
+    pil_images = []
+    if input_files is not None:
+        files_list = input_files if isinstance(input_files, list) else [input_files]
+        for f in files_list:
+            try:
+                path = f.name if hasattr(f, "name") else f
+                img = Image.open(path).convert("RGB")
+                pil_images.append(img)
+            except Exception:
+                continue
+    if len(pil_images) > 0:
+        for i, img in enumerate(pil_images):
+            _clean_memory()
+            local_seed = master_seed + i
+            generator = torch.Generator(device="cuda").manual_seed(local_seed)
+            resized_image = img.resize((target_w, target_h))
+            try:
+                with torch.inference_mode():
+                    output_frames = image_to_video_pipe(
+                        image=resized_image,
+                        prompt=prompt,
+                        negative_prompt=negative_prompt,
+                        height=target_h,
+                        width=target_w,
+                        num_frames=num_frames,
+                        guidance_scale=float(guidance_scale),
+                        num_inference_steps=int(steps),
+                        generator=generator
+                    ).frames[0]
+                with tempfile.NamedTemporaryFile(suffix=f"_img_{i}.mp4", delete=False) as tmp:
+                    export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
+                    video_clips_paths.append(tmp.name)
+                progress((i + 1) / len(pil_images))
+            except Exception:
+                continue
     else:
+        # Modo Texto a Video: Generamos un solo clip con la cantidad de frames solicitada
+        _clean_memory()
+        generator = torch.Generator(device="cuda").manual_seed(master_seed)
         with torch.inference_mode():
+            output_frames = text_to_video_pipe(
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                height=target_h,
+                width=target_w,
+                num_frames=num_frames,
+                guidance_scale=float(guidance_scale),
+                num_inference_steps=int(steps),
+                generator=generator
             ).frames[0]
+        with tempfile.NamedTemporaryFile(suffix="_txt2vid.mp4", delete=False) as tmp:
+            export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
+            video_clips_paths.append(tmp.name)
+        progress(1.0)
+    _clean_memory()
+    return video_clips_paths, master_seed
+@spaces.GPU()
+def stitch_videos(video_paths):
+    if not video_paths:
+        return None
+    if len(video_paths) == 1:
+        return video_paths[0]
+    try:
+        clips = [VideoFileClip(p) for p in video_paths]
+        final_clip = concatenate_videoclips(clips, method="compose")
+        with tempfile.NamedTemporaryFile(suffix="_final.mp4", delete=False) as final_tmp:
+            final_path = final_tmp.name
+        final_clip.write_videofile(final_path, codec="libx264", audio=False, fps=FIXED_FPS, logger=None)
+        for c in clips: c.close()
+        return final_path
+    except Exception:
+        return video_paths[0]
+@spaces.GPU()
+def main_process(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed):
+    clips, used_seed = generate_video_gpu(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed)
+    final_video = stitch_videos(clips)
+    return final_video, used_seed
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Fast Wan 2.2 - Generador de Video")
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_files = gr.File(
+                label="Imágenes de Entrada",
+                file_count="multiple",
+                type="filepath",
+                file_types=["image"]
+            )
+            prompt = gr.Textbox(label="Prompt", value="Cinematic view, realistic lighting, 4k, slow motion", lines=2)
+            frames = gr.Slider(
+                minimum=MIN_FRAMES_MODEL,
+                maximum=MAX_FRAMES_MODEL,
+                step=1,
+                value=81,
+                label="Duración (Frames)",
+                info=f"Máximo soportado por el modelo: {MAX_FRAMES_MODEL} frames"
+            )
+            with gr.Accordion("Configuración Avanzada", open=False):
+                neg_prompt = gr.Textbox(label="Prompt Negativo", value="low quality, distortion, text, watermark, blurry, ugly", lines=2)
+                seed = gr.Slider(label="Semilla", minimum=0, maximum=MAX_SEED, step=1, value=42)
+                rand_seed = gr.Checkbox(label="Semilla Aleatoria", value=True)
+                with gr.Row():
+                    height = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Altura")
+                    width = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Anchura")
+                steps = gr.Slider(minimum=2, maximum=10, step=1, value=4, label="Pasos")
+                scale = gr.Slider(minimum=1.0, maximum=8.0, step=0.1, value=5.0, label="Guidance Scale")
+            btn_gen = gr.Button("Generar", variant="primary", size="lg")
+        with gr.Column(scale=2):
+            output_video = gr.Video(label="Resultado Final", autoplay=True)
+            output_seed = gr.Number(label="Semilla Usada")
+    btn_gen.click(
+        fn=main_process,
+        inputs=[input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed],
+        outputs=[output_video, output_seed]
     )
 if __name__ == "__main__":