File size: 7,556 Bytes
3ba39a7
 
 
6934c92
3ba39a7
64076fa
3ba39a7
136da9a
3ba39a7
5e014a6
3ba39a7
 
 
 
 
 
6934c92
c19ca3d
a0f4ef6
398c57e
c19ca3d
 
 
 
 
64076fa
cecea50
3ba39a7
 
3b76114
a0f4ef6
c19ca3d
 
a0f4ef6
3ba39a7
 
 
ba5d0dc
3ba39a7
 
a0f4ef6
 
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a377bd
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19ca3d
 
3ba39a7
 
 
3a377bd
3ba39a7
c19ca3d
3ba39a7
 
 
 
 
 
 
 
 
c19ca3d
3ba39a7
 
 
 
 
 
6934c92
3ba39a7
 
6934c92
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19ca3d
3ba39a7
 
 
 
 
c19ca3d
3ba39a7
 
c19ca3d
3ba39a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3aca9f1
3ba39a7
 
 
 
 
 
 
 
6934c92
 
 
c19ca3d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
import os
import sys
import gc
import tempfile
import random
import numpy as np
import torch
from PIL import Image

#os.system("pip install spaces-0.1.0-py3-none-any.whl moviepy==1.0.3 imageio[ffmpeg]")

import spaces
import gradio as gr
from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
from diffusers.utils import export_to_video
from moviepy.editor import VideoFileClip, concatenate_videoclips

MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)

text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)

for pipe in [text_to_video_pipe, image_to_video_pipe]:
    pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)

MOD_VALUE = 32
DEFAULT_H = 896
DEFAULT_W = 896
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 24
MIN_FRAMES_MODEL = 25
MAX_FRAMES_MODEL = 193

@spaces.GPU()
def _clean_memory():
    gc.collect()

@spaces.GPU()
def generate_video_gpu(input_files, prompt, height, width, negative_prompt, target_frames, guidance_scale, steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
    target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
    target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
    
    # Asegurar que los frames estén dentro de los límites del modelo
    num_frames = min(max(int(target_frames), 1), MAX_FRAMES_MODEL)
    
    master_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
    
    video_clips_paths = []
    pil_images = []

    if input_files is not None:
        files_list = input_files if isinstance(input_files, list) else [input_files]
        for f in files_list:
            try:
                path = f.name if hasattr(f, "name") else f
                img = Image.open(path).convert("RGB")
                pil_images.append(img)
            except Exception:
                continue

    if len(pil_images) > 0:
        for i, img in enumerate(pil_images):
            _clean_memory()
            
            local_seed = master_seed + i
            generator = torch.Generator.manual_seed(local_seed)
            
            resized_image = img.resize((target_w, target_h))
            
            try:
                with torch.inference_mode():
                    output_frames = image_to_video_pipe(
                        image=resized_image, 
                        prompt=prompt, 
                        negative_prompt=negative_prompt,
                        height=target_h, 
                        width=target_w, 
                        num_frames=num_frames,
                        guidance_scale=float(guidance_scale), 
                        num_inference_steps=int(steps),
                        generator=generator
                    ).frames[0]
                
                with tempfile.NamedTemporaryFile(suffix=f"_img_{i}.mp4", delete=False) as tmp:
                    export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
                    video_clips_paths.append(tmp.name)
                    
                progress((i + 1) / len(pil_images))
                
            except Exception:
                continue

    else:
        # Modo Texto a Video: Generamos un solo clip con la cantidad de frames solicitada
        _clean_memory()
        
        generator = torch.Generator.manual_seed(master_seed)
        
        with torch.inference_mode():
            output_frames = text_to_video_pipe(
                prompt=prompt, 
                negative_prompt=negative_prompt,
                height=target_h, 
                width=target_w, 
                num_frames=num_frames,
                guidance_scale=float(guidance_scale), 
                num_inference_steps=int(steps),
                generator=generator
            ).frames[0]
        
        with tempfile.NamedTemporaryFile(suffix="_txt2vid.mp4", delete=False) as tmp:
            export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
            video_clips_paths.append(tmp.name)
        
        progress(1.0)

    _clean_memory()
    return video_clips_paths, master_seed

@spaces.GPU()
def stitch_videos(video_paths):
    if not video_paths:
        return None
        
    if len(video_paths) == 1:
        return video_paths[0]
        
    try:
        clips = [VideoFileClip(p) for p in video_paths]
        final_clip = concatenate_videoclips(clips, method="compose")
        
        with tempfile.NamedTemporaryFile(suffix="_final.mp4", delete=False) as final_tmp:
            final_path = final_tmp.name
            
        final_clip.write_videofile(final_path, codec="libx264", audio=False, fps=FIXED_FPS, logger=None)
        
        for c in clips: c.close()
        
        return final_path
    except Exception:
        return video_paths[0]

@spaces.GPU()
def main_process(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed):
    clips, used_seed = generate_video_gpu(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed)
    final_video = stitch_videos(clips)
    return final_video, used_seed

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Fast Wan 2.2 - Generador de Video")

    with gr.Row():
        with gr.Column(scale=1):
            input_files = gr.File(
                label="Imágenes de Entrada", 
                file_count="multiple", 
                type="filepath",
                file_types=["image"]
            )
            
            prompt = gr.Textbox(label="Prompt", value="Cinematic view, realistic lighting, 4k, slow motion", lines=2)
            
            frames = gr.Slider(
                minimum=MIN_FRAMES_MODEL, 
                maximum=MAX_FRAMES_MODEL, 
                step=1, 
                value=81, 
                label="Duración (Frames)", 
                info=f"Máximo soportado por el modelo: {MAX_FRAMES_MODEL} frames"
            )

            with gr.Accordion("Configuración Avanzada", open=False):
                neg_prompt = gr.Textbox(label="Prompt Negativo", value="low quality, distortion, text, watermark, blurry, ugly", lines=2)
                seed = gr.Slider(label="Semilla", minimum=0, maximum=MAX_SEED, step=1, value=42)
                rand_seed = gr.Checkbox(label="Semilla Aleatoria", value=True)
                
                with gr.Row():
                    height = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Altura")
                    width = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Anchura")
                
                steps = gr.Slider(minimum=2, maximum=10, step=1, value=4, label="Pasos")
                scale = gr.Slider(minimum=1.0, maximum=8.0, step=0.1, value=5.0, label="Guidance Scale")
            
            btn_gen = gr.Button("Generar", variant="primary", size="lg")
        
        with gr.Column(scale=2):
            output_video = gr.Video(label="Resultado Final", autoplay=True)
            output_seed = gr.Number(label="Semilla Usada")

    btn_gen.click(
        fn=main_process,
        inputs=[input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed],
        outputs=[output_video, output_seed]
    )

if __name__ == "__main__":
    demo.queue().launch()