Spaces:

Reaper200
/

test_video_ai

Runtime error

App Files Files Community

Reaper200 commited on Sep 4, 2025

Commit

8a88200

verified ·

1 Parent(s): b6d1ef4

Create app.py

Browse files

Files changed (1) hide show

app.py +145 -131

app.py CHANGED Viewed

@@ -1,154 +1,168 @@
-import gradio as gr
-import numpy as np
-import random
-# import spaces #[uncomment to use ZeroGPU]
-from diffusers import DiffusionPipeline
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model_repo_id = "stabilityai/sdxl-turbo"  # Replace to the model you would like to use
-if torch.cuda.is_available():
-    torch_dtype = torch.float16
-else:
-    torch_dtype = torch.float32
-pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
-pipe = pipe.to(device)
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 1024
-# @spaces.GPU #[uncomment to use ZeroGPU]
-def infer(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    progress=gr.Progress(track_tqdm=True),
-):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator().manual_seed(seed)
-    image = pipe(
-        prompt=prompt,
-        negative_prompt=negative_prompt,
-        guidance_scale=guidance_scale,
-        num_inference_steps=num_inference_steps,
-        width=width,
-        height=height,
-        generator=generator,
-    ).images[0]
-    return image, seed
-examples = [
-    "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
-    "An astronaut riding a green horse",
-    "A delicious ceviche cheesecake slice",
-]
-css = """
-#col-container {
-    margin: 0 auto;
-    max-width: 640px;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    with gr.Column(elem_id="col-container"):
-        gr.Markdown(" # Text-to-Image Gradio Template")
         with gr.Row():
-            prompt = gr.Text(
-                label="Prompt",
-                show_label=False,
-                max_lines=1,
-                placeholder="Enter your prompt",
-                container=False,
-            )
-            run_button = gr.Button("Run", scale=0, variant="primary")
-        result = gr.Image(label="Result", show_label=False)
-        with gr.Accordion("Advanced Settings", open=False):
-            negative_prompt = gr.Text(
-                label="Negative prompt",
-                max_lines=1,
-                placeholder="Enter a negative prompt",
-                visible=False,
-            )
-            seed = gr.Slider(
-                label="Seed",
-                minimum=0,
-                maximum=MAX_SEED,
-                step=1,
-                value=0,
-            )
-            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
-            with gr.Row():
-                width = gr.Slider(
-                    label="Width",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
-                )
-                height = gr.Slider(
-                    label="Height",
-                    minimum=256,
-                    maximum=MAX_IMAGE_SIZE,
-                    step=32,
-                    value=1024,  # Replace with defaults that work for your model
                 )
-            with gr.Row():
-                guidance_scale = gr.Slider(
-                    label="Guidance scale",
-                    minimum=0.0,
-                    maximum=10.0,
-                    step=0.1,
-                    value=0.0,  # Replace with defaults that work for your model
-                )
-                num_inference_steps = gr.Slider(
-                    label="Number of inference steps",
-                    minimum=1,
-                    maximum=50,
-                    step=1,
-                    value=2,  # Replace with defaults that work for your model
-                )
-        gr.Examples(examples=examples, inputs=[prompt])
-    gr.on(
-        triggers=[run_button.click, prompt.submit],
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-        ],
-        outputs=[result, seed],
-    )
-if __name__ == "__main__":
-    demo.launch()

+import os
+import tempfile
+from typing import List
+import gradio as gr
 import torch
+from PIL import Image
+from diffusers import StableVideoDiffusionPipeline
+from diffusers.utils import export_to_video
+MODEL_ID_DEFAULT = os.getenv("MODEL_ID", "stabilityai/stable-video-diffusion-img2vid")
+DTYPE = torch.float16 if torch.cuda.is_available() else torch.float32
+pipe = None
+def load_pipeline(model_id: str = MODEL_ID_DEFAULT):
+    global pipe
+    if pipe is not None:
+        return pipe
+    kwargs = {
+        "torch_dtype": DTYPE,
+    }
+    # fp16 variant helps on GPU spaces
+    if DTYPE == torch.float16:
+        kwargs["variant"] = "fp16"
+    pipe_local = StableVideoDiffusionPipeline.from_pretrained(
+        model_id,
+        **kwargs,
+    )
+    # memory & speed tweaks
+    if torch.cuda.is_available():
+        pipe_local.enable_model_cpu_offload()  # good default for Spaces GPUs
+    else:
+        pipe_local.enable_sequential_cpu_offload()
+    pipe_local.enable_vae_slicing()
+    pipe_local.enable_attention_slicing()
+    pipe = pipe_local
+    return pipe
+def _ensure_rgb(img: Image.Image) -> Image.Image:
+    if img.mode != "RGB":
+        return img.convert("RGB")
+    return img
+def generate(
+    image: Image.Image,
+    num_frames: int = 14,
+    fps: int = 8,
+    motion_bucket_id: int = 127,
+    noise_aug_strength: float = 0.02,
+    seed: int = 0,
+    decode_chunk_size: int = 8,
+    model_id: str = MODEL_ID_DEFAULT,
+):
+    if image is None:
+        raise gr.Error("Please upload an image.")
+    pipe = load_pipeline(model_id)
+    # Determinism
+    generator = torch.Generator(device="cuda" if torch.cuda.is_available() else "cpu")
+    if seed is None or seed < 0:
+        seed = torch.seed() % (2**31)
+    generator = generator.manual_seed(int(seed))
+    image = _ensure_rgb(image)
+    with torch.inference_mode():
+        result = pipe(
+            image=image,
+            num_frames=int(num_frames),
+            fps=fps,
+            motion_bucket_id=int(motion_bucket_id),
+            noise_aug_strength=float(noise_aug_strength),
+            decode_chunk_size=int(decode_chunk_size),
+            generator=generator,
+        )
+    frames: List[Image.Image] = result.frames[0]
+    # Save to a temp .mp4
+    tmpdir = tempfile.mkdtemp()
+    out_path = os.path.join(tmpdir, "output.mp4")
+    export_to_video(frames, out_path, fps=fps)
+    return out_path
+def build_demo():
+    with gr.Blocks(theme=gr.themes.Soft(), fill_width=True) as demo:
+        gr.Markdown(
+            """
+            # Image → Video (Stable Video Diffusion)
+            Pretrained **Stable Video Diffusion (Img2Vid)** from the Hugging Face Hub.
+            - Default model: `stabilityai/stable-video-diffusion-img2vid`
+            - Try alternative ids like `stabilityai/stable-video-diffusion-img2vid-xt`
+            """
+        )
         with gr.Row():
+            with gr.Column(scale=1):
+                inp_img = gr.Image(type="pil", label="Input image", width=512)
+                model_id = gr.Textbox(
+                    value=MODEL_ID_DEFAULT,
+                    label="Model repo id",
+                    info="Any compatible Img2Vid pipeline on the Hub",
                 )
+                with gr.Accordion("Advanced", open=False):
+                    num_frames = gr.Slider(8, 25, value=14, step=1, label="Frames")
+                    fps = gr.Slider(4, 30, value=8, step=1, label="FPS")
+                    motion_bucket_id = gr.Slider(1, 255, value=127, step=1, label="Motion bucket id")
+                    noise_aug_strength = gr.Slider(0.0, 0.5, value=0.02, step=0.01, label="Noise aug strength")
+                    decode_chunk_size = gr.Slider(1, 32, value=8, step=1, label="Decode chunk size")
+                    seed = gr.Number(value=0, precision=0, label="Seed (0 for random)")
+                run = gr.Button("Generate", variant="primary")
+            with gr.Column(scale=1):
+                out_vid = gr.Video(label="Output video (.mp4)")
+        run.click(
+            fn=generate,
+            inputs=[
+                inp_img,
+                num_frames,
+                fps,
+                motion_bucket_id,
+                noise_aug_strength,
+                seed,
+                decode_chunk_size,
+                model_id,
+            ],
+            outputs=[out_vid],
+            queue=True,
+            api_name="predict",
+        )
+        gr.Examples(
+            examples=[
+                ["https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/sketch-mountains-input.jpg", 14, 8, 127, 0.02, 0, 8, MODEL_ID_DEFAULT],
+            ],
+            inputs=[inp_img, num_frames, fps, motion_bucket_id, noise_aug_strength, seed, decode_chunk_size, model_id],
+            label="Try an example (downloads on-click)",
+        )
+    return demo
+demo = build_demo()
+if __name__ == "__main__":
+    demo.queue(concurrency_count=1, max_size=8).launch()