Stable-Video-Diffusion-Img2Vid

Paused

App Files Files Community

Let's follow the advice of the error message: add low_cpu_mem_usage=False and device_map=None

#32

by Fabrice-TIERCELIN - opened Jul 9, 2024

base: refs/heads/main

←

from: refs/pr/32

Discussion Files changed

+10

-3

Files changed (1) hide show

app.py +10 -3

app.py CHANGED Viewed

@@ -23,6 +23,11 @@ fps14Pipe = StableVideoDiffusionPipeline.from_pretrained(
 )
 fps14Pipe.to("cuda")
 max_64_bit_int = 2**63 - 1
 def animate(
@@ -101,7 +106,9 @@ def animate_on_gpu(
 ):
     generator = torch.manual_seed(seed)
-    if version == "svdxt":
         return fps25Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     else:
         return fps14Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
@@ -172,13 +179,13 @@ with gr.Blocks() as demo:
       with gr.Column():
           image = gr.Image(label="Upload your image", type="pil")
           with gr.Accordion("Advanced options", open=False):
-              fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, minimum=5, maximum=30)
               motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
               noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
               decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
               video_format = gr.Radio([["*.mp4", "mp4"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
               frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
-              version = gr.Radio([["Auto", "auto"], ["🏃🏻‍♀️ SVD (trained on 14 f/s)", "svd"], ["🏃🏻‍♀️💨 SVD-XT (trained on 25 f/s)", "svdxt"]], label="Model", info="Trained model", value="auto", interactive=True)
               seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
               randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

 )
 fps14Pipe.to("cuda")
+dragnuwaPipe = StableVideoDiffusionPipeline.from_pretrained(
+    "a-r-r-o-w/dragnuwa-svd", torch_dtype=torch.float16, variant="fp16", low_cpu_mem_usage=False, device_map=None
+)
+dragnuwaPipe.to("cuda")
 max_64_bit_int = 2**63 - 1
 def animate(
 ):
     generator = torch.manual_seed(seed)
+    if version == "dragnuwa":
+        return dragnuwaPipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
+    elif version == "svdxt":
         return fps25Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     else:
         return fps14Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
       with gr.Column():
           image = gr.Image(label="Upload your image", type="pil")
           with gr.Accordion("Advanced options", open=False):
+              fps_id = gr.Slider(label="Frames per second", info="The length of your video in seconds will be 25/fps", value=25, minimum=5, maximum=30)
               motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
               noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
               decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
               video_format = gr.Radio([["*.mp4", "mp4"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
               frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
+              version = gr.Radio([["Auto", "auto"], ["🏃🏻‍♀️ SVD (trained on 14 f/s)", "svd"], ["🏃🏻‍♀️💨 SVD-XT (trained on 25 f/s)", "svdxt"], ["DragNUWA", "dragnuwa"]], label="Model", info="Trained model", value="auto", interactive=True)
               seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)
               randomize_seed = gr.Checkbox(label="Randomize seed", value=True)