Stable-Video-Diffusion-Img2Vid

Paused

App Files Files Community

Remove the formats that do not work and parameterize the size

#37

by Fabrice-TIERCELIN - opened Jul 11, 2024

base: refs/heads/main

←

from: refs/pr/37

Discussion Files changed

+13

-7

Files changed (1) hide show

app.py +13 -7

app.py CHANGED Viewed

@@ -41,9 +41,11 @@ def animate(
     video_format: str = "mp4",
     frame_format: str = "webp",
     version: str = "auto",
-    output_folder: str = "outputs",
 ):
     start = time.time()
     if image.mode == "RGBA":
         image = image.convert("RGB")
@@ -63,7 +65,9 @@ def animate(
         fps_id,
         noise_aug_strength,
         decoding_t,
-        version
     )
     os.makedirs(output_folder, exist_ok=True)
@@ -118,16 +122,18 @@ def animate_on_gpu(
     fps_id: int = 6,
     noise_aug_strength: float = 0.1,
     decoding_t: int = 3,
-    version: str = "svdxt"
 ):
     generator = torch.manual_seed(seed)
     if version == "dragnuwa":
-        return dragnuwaPipe(image, width=1024, height=576, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     elif version == "svdxt":
-        return fps25Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     else:
-        return fps14Pipe(image, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
 def resize_image(image, output_size=(1024, 576)):
@@ -199,7 +205,7 @@ with gr.Blocks() as demo:
               motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
               noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
               decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
-              video_format = gr.Radio([["*.mp4", "mp4"], ["*.avi", "avi"], ["*.webm", "webm"], ["*.ogv", "ogv"], ["*.mkv", "mkv"], ["*.mov", "mov"], ["*.flv", "flv"], ["*.wmv", "wmv"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
               frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
               version = gr.Radio([["Auto", "auto"], ["🏃🏻‍♀️ SVD (trained on 14 f/s)", "svd"], ["🏃🏻‍♀️💨 SVD-XT (trained on 25 f/s)", "svdxt"], ["DragNUWA (unstable)", "dragnuwa"]], label="Model", info="Trained model", value="auto", interactive=True)
               seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)

     video_format: str = "mp4",
     frame_format: str = "webp",
     version: str = "auto",
+    width: int = 1024,
+    height: int = 576
 ):
     start = time.time()
+    output_folder = "outputs"
     if image.mode == "RGBA":
         image = image.convert("RGB")
         fps_id,
         noise_aug_strength,
         decoding_t,
+        version,
+        width,
+        height
     )
     os.makedirs(output_folder, exist_ok=True)
     fps_id: int = 6,
     noise_aug_strength: float = 0.1,
     decoding_t: int = 3,
+    version: str = "svdxt",
+    width: int = 1024,
+    height: int = 576
 ):
     generator = torch.manual_seed(seed)
     if version == "dragnuwa":
+        return dragnuwaPipe(image, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     elif version == "svdxt":
+        return fps25Pipe(image, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
     else:
+        return fps14Pipe(image, width=width, height=height, decode_chunk_size=decoding_t, generator=generator, motion_bucket_id=motion_bucket_id, noise_aug_strength=noise_aug_strength, num_frames=25).frames[0]
 def resize_image(image, output_size=(1024, 576)):
               motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, minimum=1, maximum=255)
               noise_aug_strength = gr.Slider(label="Noise strength", info="The noise to add", value=0.1, minimum=0, maximum=1, step=0.1)
               decoding_t = gr.Slider(label="Decoding", info="Number of frames decoded at a time; this eats more VRAM; reduce if necessary", value=3, minimum=1, maximum=5, step=1)
+              video_format = gr.Radio([["*.mp4", "mp4"], ["*.avi", "avi"], ["*.wmv", "wmv"], ["*.mkv", "mkv"], ["*.mov", "mov"], ["*.gif", "gif"]], label="Video format for result", info="File extention", value="mp4", interactive=True)
               frame_format = gr.Radio([["*.webp", "webp"], ["*.png", "png"], ["*.jpeg", "jpeg"], ["*.gif (unanimated)", "gif"], ["*.bmp", "bmp"]], label="Image format for frames", info="File extention", value="webp", interactive=True)
               version = gr.Radio([["Auto", "auto"], ["🏃🏻‍♀️ SVD (trained on 14 f/s)", "svd"], ["🏃🏻‍♀️💨 SVD-XT (trained on 25 f/s)", "svdxt"], ["DragNUWA (unstable)", "dragnuwa"]], label="Model", info="Trained model", value="auto", interactive=True)
               seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int, step=1)