wan2-2-fastx

Paused

File size: 7,556 Bytes

3ba39a7
 
 
6934c92
3ba39a7
64076fa
3ba39a7
136da9a
3ba39a7
5e014a6
3ba39a7
 
 
 
 
 
6934c92
c19ca3d
a0f4ef6
398c57e
c19ca3d
 
 
 
 
64076fa
cecea50
3ba39a7
 
3b76114
a0f4ef6
c19ca3d
 
a0f4ef6
3ba39a7
 
 
ba5d0dc
3ba39a7
 
a0f4ef6
 
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a377bd
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19ca3d
 
3ba39a7
 
 
3a377bd
3ba39a7
c19ca3d
3ba39a7
 
 
 
 
 
 
 
 
c19ca3d
3ba39a7
 
 
 
 
 
6934c92
3ba39a7
 
6934c92
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19ca3d
3ba39a7
 
 
 
 
c19ca3d
3ba39a7
 
c19ca3d
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3aca9f1
3ba39a7
 
 
 
 
 
 
 
6934c92
 
 
c19ca3d

import os
import sys
import gc
import tempfile
import random
import numpy as np
import torch
from PIL import Image

#os.system("pip install spaces-0.1.0-py3-none-any.whl moviepy==1.0.3 imageio[ffmpeg]")

import spaces
import gradio as gr
from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
from diffusers.utils import export_to_video
from moviepy.editor import VideoFileClip, concatenate_videoclips

MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)

text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)

for pipe in [text_to_video_pipe, image_to_video_pipe]:
    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)

MOD_VALUE = 32
DEFAULT_H = 896
DEFAULT_W = 896
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 24
MIN_FRAMES_MODEL = 25
MAX_FRAMES_MODEL = 193

@spaces.GPU()
def _clean_memory():
    gc.collect()

@spaces.GPU()
def generate_video_gpu(input_files, prompt, height, width, negative_prompt, target_frames, guidance_scale, steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
    target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
    
    # Asegurar que los frames estén dentro de los límites del modelo
    num_frames = min(max(int(target_frames), 1), MAX_FRAMES_MODEL)
    
    master_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
    
    video_clips_paths = []
    pil_images = []

    if input_files is not None:
        files_list = input_files if isinstance(input_files, list) else [input_files]
        for f in files_list:
            try:
                path = f.name if hasattr(f, "name") else f
                img = Image.open(path).convert("RGB")
                pil_images.append(img)
            except Exception:
                continue

    if len(pil_images) > 0:
        for i, img in enumerate(pil_images):
            _clean_memory()
            
            local_seed = master_seed + i
            generator = torch.Generator.manual_seed(local_seed)
            
            resized_image = img.resize((target_w, target_h))
            
            try:
                with torch.inference_mode():
                    output_frames = image_to_video_pipe(
                        image=resized_image, 
                        prompt=prompt, 
                        negative_prompt=negative_prompt,
                        height=target_h, 
                        width=target_w, 
                        num_frames=num_frames,
                        guidance_scale=float(guidance_scale), 
                        num_inference_steps=int(steps),
                        generator=generator
                    ).frames[0]
                
                with tempfile.NamedTemporaryFile(suffix=f"_img_{i}.mp4", delete=False) as tmp:
                    export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
                    video_clips_paths.append(tmp.name)
                    
                progress((i + 1) / len(pil_images))
                
            except Exception:
                continue

    else:
        # Modo Texto a Video: Generamos un solo clip con la cantidad de frames solicitada
        _clean_memory()
        
        generator = torch.Generator.manual_seed(master_seed)
        
        with torch.inference_mode():
            output_frames = text_to_video_pipe(
                prompt=prompt, 
                negative_prompt=negative_prompt,
                height=target_h, 
                width=target_w, 
                num_frames=num_frames,
                guidance_scale=float(guidance_scale), 
                num_inference_steps=int(steps),
                generator=generator
            ).frames[0]
        
        with tempfile.NamedTemporaryFile(suffix="_txt2vid.mp4", delete=False) as tmp:
            export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
            video_clips_paths.append(tmp.name)
        
        progress(1.0)

    _clean_memory()
    return video_clips_paths, master_seed

@spaces.GPU()
def stitch_videos(video_paths):
    if not video_paths:
        return None
        
    if len(video_paths) == 1:
        return video_paths[0]
        
    try:
        clips = [VideoFileClip(p) for p in video_paths]
        final_clip = concatenate_videoclips(clips, method="compose")
        
        with tempfile.NamedTemporaryFile(suffix="_final.mp4", delete=False) as final_tmp:
            final_path = final_tmp.name
            
        final_clip.write_videofile(final_path, codec="libx264", audio=False, fps=FIXED_FPS, logger=None)
        
        for c in clips: c.close()
        
        return final_path
    except Exception:
        return video_paths[0]

@spaces.GPU()
def main_process(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed):
    clips, used_seed = generate_video_gpu(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed)
    final_video = stitch_videos(clips)
    return final_video, used_seed

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Fast Wan 2.2 - Generador de Video")

    with gr.Row():
        with gr.Column(scale=1):
            input_files = gr.File(
                label="Imágenes de Entrada", 
                file_count="multiple", 
                type="filepath",
                file_types=["image"]
            )
            
            prompt = gr.Textbox(label="Prompt", value="Cinematic view, realistic lighting, 4k, slow motion", lines=2)
            
            frames = gr.Slider(
                minimum=MIN_FRAMES_MODEL, 
                maximum=MAX_FRAMES_MODEL, 
                step=1, 
                value=81, 
                label="Duración (Frames)", 
                info=f"Máximo soportado por el modelo: {MAX_FRAMES_MODEL} frames"
            )

            with gr.Accordion("Configuración Avanzada", open=False):
                neg_prompt = gr.Textbox(label="Prompt Negativo", value="low quality, distortion, text, watermark, blurry, ugly", lines=2)
                seed = gr.Slider(label="Semilla", minimum=0, maximum=MAX_SEED, step=1, value=42)
                rand_seed = gr.Checkbox(label="Semilla Aleatoria", value=True)
                
                with gr.Row():
                    height = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Altura")
                    width = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Anchura")
                
                steps = gr.Slider(minimum=2, maximum=10, step=1, value=4, label="Pasos")
                scale = gr.Slider(minimum=1.0, maximum=8.0, step=0.1, value=5.0, label="Guidance Scale")
            
            btn_gen = gr.Button("Generar", variant="primary", size="lg")
        
        with gr.Column(scale=2):
            output_video = gr.Video(label="Resultado Final", autoplay=True)
            output_seed = gr.Number(label="Semilla Usada")

    btn_gen.click(
        fn=main_process,
        inputs=[input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed],
        outputs=[output_video, output_seed]
    )

if __name__ == "__main__":
    demo.queue().launch()