Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,55 +1,83 @@
|
|
| 1 |
-
import huggingface_hub as hf_hub
|
| 2 |
-
# Shim missing APIs removed in huggingface_hub >= 0.26.0
|
| 3 |
-
if not hasattr(hf_hub, "cached_download"):
|
| 4 |
-
hf_hub.cached_download = hf_hub.hf_hub_download
|
| 5 |
-
if not hasattr(hf_hub, "model_info"):
|
| 6 |
-
hf_hub.model_info = hf_hub.get_model_info
|
| 7 |
-
|
| 8 |
import gradio as gr
|
| 9 |
import torch
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
| 14 |
import tempfile
|
| 15 |
-
from diffusers import StableVideoDiffusionPipeline
|
| 16 |
-
from diffusers.utils import export_to_video
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
image=first_image,
|
| 30 |
last_image=last_image,
|
| 31 |
prompt=prompt,
|
|
|
|
|
|
|
| 32 |
guidance_scale=guidance,
|
| 33 |
-
num_frames=frames
|
| 34 |
).frames
|
| 35 |
-
# Export to a temporary MP4 file
|
| 36 |
-
mp4_path = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
|
| 37 |
-
export_to_video(video, mp4_path, fps=15)
|
| 38 |
-
return mp4_path # Gradio will auto-encode to base64 for the API
|
| 39 |
|
| 40 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
demo = gr.Interface(
|
| 42 |
fn=infer,
|
| 43 |
inputs=[
|
| 44 |
-
gr.Image(type="pil", label="Start
|
| 45 |
-
gr.Image(type="pil", label="End
|
| 46 |
-
gr.Textbox(placeholder="Prompt (optional)"),
|
| 47 |
-
gr.Slider(
|
| 48 |
-
gr.Slider(8, 48, 25, step=1, label="Num
|
| 49 |
],
|
| 50 |
-
outputs="
|
| 51 |
-
title="
|
|
|
|
| 52 |
)
|
| 53 |
|
| 54 |
-
|
| 55 |
-
demo.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
from diffusers import AutoencoderKLWan, WanImageToVideoPipeline
|
| 5 |
+
from diffusers.utils import export_to_video, load_image
|
| 6 |
+
from transformers import CLIPVisionModel
|
| 7 |
+
from PIL import Image
|
| 8 |
import tempfile
|
|
|
|
|
|
|
| 9 |
|
| 10 |
+
# --- Load Model ---
|
| 11 |
+
model_id = "Wan-AI/Wan2.1-FLF2V-14B-720P-Diffusers"
|
| 12 |
+
|
| 13 |
+
image_encoder = CLIPVisionModel.from_pretrained(model_id, subfolder="image_encoder", torch_dtype=torch.float32)
|
| 14 |
+
vae = AutoencoderKLWan.from_pretrained(model_id, subfolder="vae", torch_dtype=torch.float32)
|
| 15 |
+
pipe = WanImageToVideoPipeline.from_pretrained(
|
| 16 |
+
model_id, vae=vae, image_encoder=image_encoder, torch_dtype=torch.float16
|
| 17 |
+
)
|
| 18 |
+
pipe.to("cuda" if torch.cuda.is_available() else "cpu")
|
| 19 |
+
|
| 20 |
+
# --- Helper Functions ---
|
| 21 |
+
def aspect_ratio_resize(image, pipe, max_area=720 * 1280):
|
| 22 |
+
aspect_ratio = image.height / image.width
|
| 23 |
+
mod_value = pipe.vae_scale_factor_spatial * pipe.transformer.config.patch_size
|
| 24 |
+
height = round(np.sqrt(max_area * aspect_ratio)) // mod_value * mod_value
|
| 25 |
+
width = round(np.sqrt(max_area / aspect_ratio)) // mod_value * mod_value
|
| 26 |
+
image = image.resize((width, height))
|
| 27 |
+
return image, height, width
|
| 28 |
|
| 29 |
+
def center_crop_resize(image, height, width):
|
| 30 |
+
import torchvision.transforms.functional as TF
|
| 31 |
+
resize_ratio = max(width / image.width, height / image.height)
|
| 32 |
+
width = round(image.width * resize_ratio)
|
| 33 |
+
height = round(image.height * resize_ratio)
|
| 34 |
+
size = [width, height]
|
| 35 |
+
image = TF.center_crop(image, size)
|
| 36 |
+
return image, height, width
|
| 37 |
|
| 38 |
+
# --- Gradio Inference Function ---
|
| 39 |
+
def infer(first_image, last_image, prompt, guidance=5.5, frames=25):
|
| 40 |
+
# Convert to PIL
|
| 41 |
+
if not isinstance(first_image, Image.Image):
|
| 42 |
+
first_image = Image.fromarray(first_image)
|
| 43 |
+
if not isinstance(last_image, Image.Image):
|
| 44 |
+
last_image = Image.fromarray(last_image)
|
| 45 |
+
|
| 46 |
+
# Resize/crop as needed
|
| 47 |
+
first_image, height, width = aspect_ratio_resize(first_image, pipe)
|
| 48 |
+
if last_image.size != first_image.size:
|
| 49 |
+
last_image, _, _ = center_crop_resize(last_image, height, width)
|
| 50 |
+
|
| 51 |
+
# Run pipeline
|
| 52 |
+
output = pipe(
|
| 53 |
image=first_image,
|
| 54 |
last_image=last_image,
|
| 55 |
prompt=prompt,
|
| 56 |
+
height=height,
|
| 57 |
+
width=width,
|
| 58 |
guidance_scale=guidance,
|
| 59 |
+
num_frames=frames,
|
| 60 |
).frames
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
+
# Export to video
|
| 63 |
+
with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
|
| 64 |
+
export_to_video(output, tmp.name, fps=16)
|
| 65 |
+
return tmp.name
|
| 66 |
+
|
| 67 |
+
# --- Gradio Interface ---
|
| 68 |
demo = gr.Interface(
|
| 69 |
fn=infer,
|
| 70 |
inputs=[
|
| 71 |
+
gr.Image(type="pil", label="Start Frame"),
|
| 72 |
+
gr.Image(type="pil", label="End Frame"),
|
| 73 |
+
gr.Textbox(placeholder="Prompt (optional)", label="Prompt"),
|
| 74 |
+
gr.Slider(3, 12, value=5.5, step=0.1, label="Guidance Scale"),
|
| 75 |
+
gr.Slider(8, 48, value=25, step=1, label="Num Frames"),
|
| 76 |
],
|
| 77 |
+
outputs=gr.Video(label="Generated Video"),
|
| 78 |
+
title="WAN Two-Frame Video Interpolation",
|
| 79 |
+
description="Upload two images and (optionally) a prompt to create a smooth video transition."
|
| 80 |
)
|
| 81 |
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
demo.launch(show_api=True)
|