Spaces:
Paused
Paused
| import os | |
| import sys | |
| import gc | |
| import tempfile | |
| import random | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| #os.system("pip install spaces-0.1.0-py3-none-any.whl moviepy==1.0.3 imageio[ffmpeg]") | |
| import spaces | |
| import gradio as gr | |
| from diffusers import AutoencoderKLWan, WanPipeline, WanImageToVideoPipeline, UniPCMultistepScheduler | |
| from diffusers.utils import export_to_video | |
| from moviepy.editor import VideoFileClip, concatenate_videoclips | |
| MODEL_ID = "FastVideo/FastWan2.2-TI2V-5B-FullAttn-Diffusers" | |
| vae = AutoencoderKLWan.from_pretrained(MODEL_ID, subfolder="vae", torch_dtype=torch.float32) | |
| text_to_video_pipe = WanPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16) | |
| image_to_video_pipe = WanImageToVideoPipeline.from_pretrained(MODEL_ID, vae=vae, torch_dtype=torch.bfloat16) | |
| for pipe in [text_to_video_pipe, image_to_video_pipe]: | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config, flow_shift=8.0) | |
| MOD_VALUE = 32 | |
| DEFAULT_H = 896 | |
| DEFAULT_W = 896 | |
| MAX_SEED = np.iinfo(np.int32).max | |
| FIXED_FPS = 24 | |
| MIN_FRAMES_MODEL = 25 | |
| MAX_FRAMES_MODEL = 193 | |
| def _clean_memory(): | |
| gc.collect() | |
| def generate_video_gpu(input_files, prompt, height, width, negative_prompt, target_frames, guidance_scale, steps, seed, randomize_seed, progress=gr.Progress(track_tqdm=True)): | |
| target_h = max(MOD_VALUE, (int(height) // MOD_VALUE) * MOD_VALUE) | |
| target_w = max(MOD_VALUE, (int(width) // MOD_VALUE) * MOD_VALUE) | |
| # Asegurar que los frames estén dentro de los límites del modelo | |
| num_frames = min(max(int(target_frames), 1), MAX_FRAMES_MODEL) | |
| master_seed = random.randint(0, MAX_SEED) if randomize_seed else int(seed) | |
| video_clips_paths = [] | |
| pil_images = [] | |
| if input_files is not None: | |
| files_list = input_files if isinstance(input_files, list) else [input_files] | |
| for f in files_list: | |
| try: | |
| path = f.name if hasattr(f, "name") else f | |
| img = Image.open(path).convert("RGB") | |
| pil_images.append(img) | |
| except Exception: | |
| continue | |
| if len(pil_images) > 0: | |
| for i, img in enumerate(pil_images): | |
| _clean_memory() | |
| local_seed = master_seed + i | |
| generator = torch.Generator.manual_seed(local_seed) | |
| resized_image = img.resize((target_w, target_h)) | |
| try: | |
| with torch.inference_mode(): | |
| output_frames = image_to_video_pipe( | |
| image=resized_image, | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=target_h, | |
| width=target_w, | |
| num_frames=num_frames, | |
| guidance_scale=float(guidance_scale), | |
| num_inference_steps=int(steps), | |
| generator=generator | |
| ).frames[0] | |
| with tempfile.NamedTemporaryFile(suffix=f"_img_{i}.mp4", delete=False) as tmp: | |
| export_to_video(output_frames, tmp.name, fps=FIXED_FPS) | |
| video_clips_paths.append(tmp.name) | |
| progress((i + 1) / len(pil_images)) | |
| except Exception: | |
| continue | |
| else: | |
| # Modo Texto a Video: Generamos un solo clip con la cantidad de frames solicitada | |
| _clean_memory() | |
| generator = torch.Generator.manual_seed(master_seed) | |
| with torch.inference_mode(): | |
| output_frames = text_to_video_pipe( | |
| prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| height=target_h, | |
| width=target_w, | |
| num_frames=num_frames, | |
| guidance_scale=float(guidance_scale), | |
| num_inference_steps=int(steps), | |
| generator=generator | |
| ).frames[0] | |
| with tempfile.NamedTemporaryFile(suffix="_txt2vid.mp4", delete=False) as tmp: | |
| export_to_video(output_frames, tmp.name, fps=FIXED_FPS) | |
| video_clips_paths.append(tmp.name) | |
| progress(1.0) | |
| _clean_memory() | |
| return video_clips_paths, master_seed | |
| def stitch_videos(video_paths): | |
| if not video_paths: | |
| return None | |
| if len(video_paths) == 1: | |
| return video_paths[0] | |
| try: | |
| clips = [VideoFileClip(p) for p in video_paths] | |
| final_clip = concatenate_videoclips(clips, method="compose") | |
| with tempfile.NamedTemporaryFile(suffix="_final.mp4", delete=False) as final_tmp: | |
| final_path = final_tmp.name | |
| final_clip.write_videofile(final_path, codec="libx264", audio=False, fps=FIXED_FPS, logger=None) | |
| for c in clips: c.close() | |
| return final_path | |
| except Exception: | |
| return video_paths[0] | |
| def main_process(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed): | |
| clips, used_seed = generate_video_gpu(input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed) | |
| final_video = stitch_videos(clips) | |
| return final_video, used_seed | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# Fast Wan 2.2 - Generador de Video") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| input_files = gr.File( | |
| label="Imágenes de Entrada", | |
| file_count="multiple", | |
| type="filepath", | |
| file_types=["image"] | |
| ) | |
| prompt = gr.Textbox(label="Prompt", value="Cinematic view, realistic lighting, 4k, slow motion", lines=2) | |
| frames = gr.Slider( | |
| minimum=MIN_FRAMES_MODEL, | |
| maximum=MAX_FRAMES_MODEL, | |
| step=1, | |
| value=81, | |
| label="Duración (Frames)", | |
| info=f"Máximo soportado por el modelo: {MAX_FRAMES_MODEL} frames" | |
| ) | |
| with gr.Accordion("Configuración Avanzada", open=False): | |
| neg_prompt = gr.Textbox(label="Prompt Negativo", value="low quality, distortion, text, watermark, blurry, ugly", lines=2) | |
| seed = gr.Slider(label="Semilla", minimum=0, maximum=MAX_SEED, step=1, value=42) | |
| rand_seed = gr.Checkbox(label="Semilla Aleatoria", value=True) | |
| with gr.Row(): | |
| height = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Altura") | |
| width = gr.Slider(minimum=256, maximum=1024, step=32, value=832, label="Anchura") | |
| steps = gr.Slider(minimum=2, maximum=10, step=1, value=4, label="Pasos") | |
| scale = gr.Slider(minimum=1.0, maximum=8.0, step=0.1, value=5.0, label="Guidance Scale") | |
| btn_gen = gr.Button("Generar", variant="primary", size="lg") | |
| with gr.Column(scale=2): | |
| output_video = gr.Video(label="Resultado Final", autoplay=True) | |
| output_seed = gr.Number(label="Semilla Usada") | |
| btn_gen.click( | |
| fn=main_process, | |
| inputs=[input_files, prompt, height, width, neg_prompt, frames, scale, steps, seed, rand_seed], | |
| outputs=[output_video, output_seed] | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() |