Spaces:
Paused
Paused
File size: 7,556 Bytes
3ba39a7 6934c92 3ba39a7 64076fa 3ba39a7 136da9a 3ba39a7 5e014a6 3ba39a7 6934c92 c19ca3d a0f4ef6 398c57e c19ca3d 64076fa cecea50 3ba39a7 3b76114 a0f4ef6 c19ca3d a0f4ef6 3ba39a7 ba5d0dc 3ba39a7 a0f4ef6 3ba39a7 3a377bd 3ba39a7 c19ca3d 3ba39a7 3a377bd 3ba39a7 c19ca3d 3ba39a7 c19ca3d 3ba39a7 6934c92 3ba39a7 6934c92 3ba39a7 c19ca3d 3ba39a7 c19ca3d 3ba39a7 c19ca3d 3ba39a7 3aca9f1 3ba39a7 6934c92 c19ca3d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | import os
import sys
import gc
import tempfile
import random
import numpy as np
import torch
from PIL import Image
#os.system("pip install spaces-0.1.0-py3-none-any.whl moviepy==1.0.3 imageio[ffmpeg]")
import spaces
import gradio as gr
from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler
from diffusers.utils import export_to_video
from moviepy.editor import VideoFileClip, concatenate_videoclips
MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers"
vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32)
text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16)
for pipe in [text_to_video_pipe, image_to_video_pipe]:
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0)
MOD_VALUE = 32
DEFAULT_H = 896
DEFAULT_W = 896
MAX_SEED = np.iinfo(np.int32).max
FIXED_FPS = 24
MIN_FRAMES_MODEL = 25
MAX_FRAMES_MODEL = 193
@spaces.GPU()
def _clean_memory():
gc.collect()
@spaces.GPU()
def generate_video_gpu(input_files, prompt, height, width, negative_prompt, target_frames, guidance_scale, steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)):
target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE)
target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE)
# Asegurar que los frames estén dentro de los límites del modelo
num_frames = min(max(int(target_frames), 1), MAX_FRAMES_MODEL)
master_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed)
video_clips_paths = []
pil_images = []
if input_files is not None:
files_list = input_files if isinstance(input_files, list) else [input_files]
for f in files_list:
try:
path = f.name if hasattr(f, "name") else f
img = Image.open(path).convert("RGB")
pil_images.append(img)
except Exception:
continue
if len(pil_images) > 0:
for i, img in enumerate(pil_images):
_clean_memory()
local_seed = master_seed + i
generator = torch.Generator.manual_seed(local_seed)
resized_image = img.resize((target_w, target_h))
try:
with torch.inference_mode():
output_frames = image_to_video_pipe(
image=resized_image,
prompt=prompt,
negative_prompt=negative_prompt,
height=target_h,
width=target_w,
num_frames=num_frames,
guidance_scale=float(guidance_scale),
num_inference_steps=int(steps),
generator=generator
).frames[0]
with tempfile.NamedTemporaryFile(suffix=f"_img_{i}.mp4", delete=False) as tmp:
export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
video_clips_paths.append(tmp.name)
progress((i + 1) / len(pil_images))
except Exception:
continue
else:
# Modo Texto a Video: Generamos un solo clip con la cantidad de frames solicitada
_clean_memory()
generator = torch.Generator.manual_seed(master_seed)
with torch.inference_mode():
output_frames = text_to_video_pipe(
prompt=prompt,
negative_prompt=negative_prompt,
height=target_h,
width=target_w,
num_frames=num_frames,
guidance_scale=float(guidance_scale),
num_inference_steps=int(steps),
generator=generator
).frames[0]
with tempfile.NamedTemporaryFile(suffix="_txt2vid.mp4", delete=False) as tmp:
export_to_video(output_frames, tmp.name, fps=FIXED_FPS)
video_clips_paths.append(tmp.name)
progress(1.0)
_clean_memory()
return video_clips_paths, master_seed
@spaces.GPU()
def stitch_videos(video_paths):
if not video_paths:
return None
if len(video_paths) == 1:
return video_paths[0]
try:
clips = [VideoFileClip(p) for p in video_paths]
final_clip = concatenate_videoclips(clips, method="compose")
with tempfile.NamedTemporaryFile(suffix="_final.mp4", delete=False) as final_tmp:
final_path = final_tmp.name
final_clip.write_videofile(final_path, codec="libx264", audio=False, fps=FIXED_FPS, logger=None)
for c in clips: c.close()
return final_path
except Exception:
return video_paths[0]
@spaces.GPU()
def main_process(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed):
clips, used_seed = generate_video_gpu(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed)
final_video = stitch_videos(clips)
return final_video, used_seed
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Fast Wan 2.2 - Generador de Video")
with gr.Row():
with gr.Column(scale=1):
input_files = gr.File(
label="Imágenes de Entrada",
file_count="multiple",
type="filepath",
file_types=["image"]
)
prompt = gr.Textbox(label="Prompt", value="Cinematic view, realistic lighting, 4k, slow motion", lines=2)
frames = gr.Slider(
minimum=MIN_FRAMES_MODEL,
maximum=MAX_FRAMES_MODEL,
step=1,
value=81,
label="Duración (Frames)",
info=f"Máximo soportado por el modelo: {MAX_FRAMES_MODEL} frames"
)
with gr.Accordion("Configuración Avanzada", open=False):
neg_prompt = gr.Textbox(label="Prompt Negativo", value="low quality, distortion, text, watermark, blurry, ugly", lines=2)
seed = gr.Slider(label="Semilla", minimum=0, maximum=MAX_SEED, step=1, value=42)
rand_seed = gr.Checkbox(label="Semilla Aleatoria", value=True)
with gr.Row():
height = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Altura")
width = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Anchura")
steps = gr.Slider(minimum=2, maximum=10, step=1, value=4, label="Pasos")
scale = gr.Slider(minimum=1.0, maximum=8.0, step=0.1, value=5.0, label="Guidance Scale")
btn_gen = gr.Button("Generar", variant="primary", size="lg")
with gr.Column(scale=2):
output_video = gr.Video(label="Resultado Final", autoplay=True)
output_seed = gr.Number(label="Semilla Usada")
btn_gen.click(
fn=main_process,
inputs=[input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed],
outputs=[output_video, output_seed]
)
if __name__ == "__main__":
demo.queue().launch() |