import spaces import torch import gc import os import gradio as gr from diffusers import WanPipeline from diffusers.utils import export_to_video from huggingface_hub import snapshot_download import tempfile import time # ============================================================ # DOWNLOAD model to DISK only at startup (near-zero RAM usage) # This runs during container startup = NO time limit # ============================================================ print("📥 Pre-caching model files to disk (no RAM used)...") start = time.time() model_path = snapshot_download( "Wan-AI/Wan2.1-T2V-1.3B-Diffusers", token=os.environ.get("HF_TOKEN"), allow_patterns=["*.safetensors", "*.json", "*.txt", "*.model"], ignore_patterns=["*.bin", "*.onnx", "*.msgpack"], ) gc.collect() print(f"✅ Files cached to disk in {time.time()-start:.0f}s") print(f"📁 Path: {model_path}") # Model loaded lazily inside GPU function pipe = None @spaces.GPU(duration=240) def generate_video(prompt, negative_prompt, num_frames, height, width, num_inference_steps, guidance_scale): global pipe if pipe is None: print("📦 Loading to GPU from local disk cache...") load_start = time.time() pipe = WanPipeline.from_pretrained( model_path, torch_dtype=torch.float16, low_cpu_mem_usage=True, ) pipe.to("cuda") pipe.vae.enable_tiling() gc.collect() print(f"✅ On GPU in {time.time()-load_start:.0f}s") start = time.time() with torch.inference_mode(): result = pipe( prompt=prompt, negative_prompt=negative_prompt, num_frames=int(num_frames), height=int(height), width=int(width), num_inference_steps=int(num_inference_steps), guidance_scale=float(guidance_scale), ).frames[0] print(f"✅ Generated in {time.time()-start:.1f}s") output_path = tempfile.mktemp(suffix=".mp4") export_to_video(result, output_path, fps=16) gc.collect(); torch.cuda.empty_cache() return output_path with gr.Blocks(title="Shotarch Video Gen", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🎬 Shotarch Video Generator\n### Wan2.1-1.3B on ZeroGPU") with gr.Row(): with gr.Column(): prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Describe your video...") negative = gr.Textbox(label="Negative Prompt", lines=2, value="Bright tones, overexposed, static, blurred details, worst quality, low quality, ugly, deformed, still picture") with gr.Row(): width = gr.Slider(480, 1280, value=1280, step=16, label="Width") height = gr.Slider(320, 720, value=720, step=16, label="Height") with gr.Row(): num_frames = gr.Slider(17, 81, value=81, step=4, label="Frames (81=5sec)") steps = gr.Slider(10, 50, value=25, step=1, label="Steps") guidance = gr.Slider(1.0, 15.0, value=5.0, step=0.5, label="Guidance Scale") btn = gr.Button("🎬 Generate Video", variant="primary") with gr.Column(): output = gr.Video(label="Generated Video") btn.click(fn=generate_video, inputs=[prompt, negative, num_frames, height, width, steps, guidance], outputs=output) demo.launch()