import spaces
import torch
import gc
import os
import gradio as gr
from diffusers import WanPipeline
from diffusers.utils import export_to_video
from huggingface_hub import snapshot_download
import tempfile
import time

# ============================================================
# DOWNLOAD model to DISK only at startup (near-zero RAM usage)
# This runs during container startup = NO time limit
# ============================================================
print("📥 Pre-caching model files to disk (no RAM used)...")
start = time.time()
model_path = snapshot_download(
    "Wan-AI/Wan2.1-T2V-1.3B-Diffusers",
    token=os.environ.get("HF_TOKEN"),
    allow_patterns=["*.safetensors", "*.json", "*.txt", "*.model"],
    ignore_patterns=["*.bin", "*.onnx", "*.msgpack"],
)
gc.collect()
print(f"✅ Files cached to disk in {time.time()-start:.0f}s")
print(f"📁 Path: {model_path}")

# Model loaded lazily inside GPU function
pipe = None

@spaces.GPU(duration=240)
def generate_video(prompt, negative_prompt, num_frames, height, width, num_inference_steps, guidance_scale):
    global pipe

    if pipe is None:
        print("📦 Loading to GPU from local disk cache...")
        load_start = time.time()
        pipe = WanPipeline.from_pretrained(
            model_path,
            torch_dtype=torch.float16,
            low_cpu_mem_usage=True,
        )
        pipe.to("cuda")
        pipe.vae.enable_tiling()
        gc.collect()
        print(f"✅ On GPU in {time.time()-load_start:.0f}s")

    start = time.time()
    with torch.inference_mode():
        result = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            num_frames=int(num_frames),
            height=int(height),
            width=int(width),
            num_inference_steps=int(num_inference_steps),
            guidance_scale=float(guidance_scale),
        ).frames[0]

    print(f"✅ Generated in {time.time()-start:.1f}s")
    output_path = tempfile.mktemp(suffix=".mp4")
    export_to_video(result, output_path, fps=16)
    gc.collect(); torch.cuda.empty_cache()
    return output_path


with gr.Blocks(title="Shotarch Video Gen", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🎬 Shotarch Video Generator\n### Wan2.1-1.3B on ZeroGPU")
    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Describe your video...")
            negative = gr.Textbox(label="Negative Prompt", lines=2, value="Bright tones, overexposed, static, blurred details, worst quality, low quality, ugly, deformed, still picture")
            with gr.Row():
                width = gr.Slider(480, 1280, value=1280, step=16, label="Width")
                height = gr.Slider(320, 720, value=720, step=16, label="Height")
            with gr.Row():
                num_frames = gr.Slider(17, 81, value=81, step=4, label="Frames (81=5sec)")
                steps = gr.Slider(10, 50, value=25, step=1, label="Steps")
            guidance = gr.Slider(1.0, 15.0, value=5.0, step=0.5, label="Guidance Scale")
            btn = gr.Button("🎬 Generate Video", variant="primary")
        with gr.Column():
            output = gr.Video(label="Generated Video")
    btn.click(fn=generate_video, inputs=[prompt, negative, num_frames, height, width, steps, guidance], outputs=output)

demo.launch()