Spaces:

kadirnar
/

Video-Diffusion-WebUI

Runtime error

App Files Files Community

kadirnar commited on Mar 10, 2023

Commit

5784791

1 Parent(s): a8cbc39

Upload 3 files

Browse files

Files changed (3) hide show

app.py +27 -82
inpaint_zoom/zoom_out_app.py +154 -0
inpaint_zoom/zoom_out_utils.py +45 -0

app.py CHANGED Viewed

@@ -1,85 +1,30 @@
-from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
-from utils import write_video, dummy, preprocess_image, preprocess_mask_image
-from PIL import Image
-import gradio as gr
-import torch
-import os
-os.environ["CUDA_VISIBLE_DEVICES"]="0"
-orig_prompt = "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers"
-orig_negative_prompt = "lurry, bad art, blurred, text, watermark"
-model_list = ["stabilityai/stable-diffusion-2-inpainting", "runwayml/stable-diffusion-inpainting"]
-def stable_diffusion_zoom_out(
-  repo_id,
-  original_prompt,
-  negative_prompt,
-  step_size,
-  num_frames,
-  fps,
-  num_inference_steps
-    ):
-    pipe = DiffusionPipeline.from_pretrained(repo_id, torch_dtype=torch.float16)
-    pipe.set_use_memory_efficient_attention_xformers(True)
-    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
-    pipe = pipe.to("cuda")
-    pipe.safety_checker = dummy
-    new_image = Image.new(mode="RGBA", size=(512,512))
-    current_image, mask_image = preprocess_mask_image(new_image)
-    current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
-    all_frames = []
-    all_frames.append(current_image)
-    for i in range(num_frames):
-        prev_image = preprocess_image(current_image, step_size, 512)
-        current_image = prev_image
-        current_image, mask_image = preprocess_mask_image(current_image)
-        current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
-        current_image.paste(prev_image, mask=prev_image)
-        all_frames.append(current_image)
-    save_path = "output.mp4"
-    write_video(save_path, all_frames, fps=fps)
-    return save_path
-inputs = [
-  gr.Dropdown(model_list, value=model_list[0], label="Model"),
-  gr.inputs.Textbox(lines=5, default=orig_prompt, label="Prompt"),
-  gr.inputs.Textbox(lines=1, default=orig_negative_prompt, label="Negative Prompt"),
-  gr.inputs.Slider(minimum=1, maximum=120, default=25, step=5, label="Steps"),
-  gr.inputs.Slider(minimum=1, maximum=100, default=10, step=1, label="Frames"),
-  gr.inputs.Slider(minimum=1, maximum=100, default=16, step=1, label="FPS"),
-  gr.inputs.Slider(minimum=1, maximum=100, default=15, step=1, label="Inference Steps")
-]
-output = gr.outputs.Video()
-examples = [
-  ["stabilityai/stable-diffusion-2-inpainting", orig_prompt, orig_negative_prompt, 25, 10, 16, 15],
-]
-title = "Stable Diffusion Infinite Zoom Out"
-description = """<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
-<br/>
-<a href="https://huggingface.co/spaces/kadirnar/stable-diffusion-2-infinite-zoom-out?duplicate=true">
-<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
-<p/>"""
-demo_app = gr.Interface(
-    fn=stable_diffusion_zoom_out,
-    description=description,
-    inputs=inputs,
-    outputs=output,
-    title=title,
-    theme='huggingface',
-    examples=examples,
-    cache_examples=True
-)
-demo_app.launch(debug=True, enable_queue=True)

+from inpaint_zoom.zoom_out_app import stable_diffusion_text2img_app
+import gradio as gr
+app = gr.Blocks()
+with app:
+    gr.HTML(
+        """
+        <h1 style='text-align: center'>
+       Stable Diffusion Infinite Zoom Out
+        </h1>
+        """
+    )
+    gr.Markdown(
+        """
+        <h4 style='text-align: center'>
+        Follow me for more!
+        <a href='https://twitter.com/kadirnar_ai' target='_blank'>Twitter</a> | <a href='https://github.com/kadirnar' target='_blank'>Github</a> | <a href='https://www.linkedin.com/in/kadir-nar/' target='_blank'>Linkedin</a>
+        </h4>
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            with gr.Tab('Zoom Out'):
+                stable_diffusion_text2img_app()
+            with gr.Tab('Zoom In'):
+                pass
+app.launch(debug=True)

inpaint_zoom/zoom_out_app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
+from inpaint_zoom.zoom_out_utils import preprocess_image, preprocess_mask_image, write_video, dummy
+from PIL import Image
+import gradio as gr
+import torch
+import os
+os.environ["CUDA_VISIBLE_DEVICES"]="0"
+stable_paint_model_list = [
+  "stabilityai/stable-diffusion-2-inpainting",
+  "runwayml/stable-diffusion-inpainting"
+]
+stable_paint_prompt_list = [
+        "Ancient underground architectural ruins of Hong Kong in a flooded apocalypse landscape of dead skyscrapers",
+        "A beautiful landscape of a mountain range with a lake in the foreground",
+]
+stable_paint_negative_prompt_list = [
+        "lurry, bad art, blurred, text, watermark",
+    ]
+def stable_diffusion_zoom_out(
+  model_id,
+  original_prompt,
+  negative_prompt,
+  guidance_scale,
+  num_inference_steps,
+  step_size,
+  num_frames,
+  fps,
+    ):
+    pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+    pipe.set_use_memory_efficient_attention_xformers(True)
+    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
+    pipe = pipe.to("cuda")
+    pipe.safety_checker = dummy
+    new_image = Image.new(mode="RGBA", size=(512,512))
+    current_image, mask_image = preprocess_mask_image(new_image)
+    current_image = pipe(
+      prompt=[original_prompt],
+      negative_prompt=[negative_prompt],
+      image=current_image,
+      mask_image=mask_image,
+      num_inference_steps=num_inference_steps,
+      guidance_scale=guidance_scale
+    ).images[0]
+    all_frames = []
+    all_frames.append(current_image)
+    for i in range(num_frames):
+        prev_image = preprocess_image(current_image, step_size, 512)
+        current_image = prev_image
+        current_image, mask_image = preprocess_mask_image(current_image)
+        current_image = pipe(prompt=[original_prompt], negative_prompt=[negative_prompt], image=current_image, mask_image=mask_image, num_inference_steps=num_inference_steps).images[0]
+        current_image.paste(prev_image, mask=prev_image)
+        all_frames.append(current_image)
+    save_path = "output.mp4"
+    write_video(save_path, all_frames, fps=fps)
+    return save_path
+def stable_diffusion_text2img_app():
+    with gr.Blocks():
+        with gr.Row():
+            with gr.Column():
+                text2image_out_model_path = gr.Dropdown(
+                    choices=stable_paint_model_list,
+                    value=stable_paint_model_list[0],
+                    label='Text-Image Model Id'
+                )
+                text2image_out_prompt = gr.Textbox(
+                    lines=1,
+                    value=stable_paint_prompt_list[0],
+                    label='Prompt'
+                )
+                text2image_out_negative_prompt = gr.Textbox(
+                    lines=1,
+                    value=stable_paint_negative_prompt_list[0],
+                    label='Negative Prompt'
+                )
+                with gr.Accordion("Advanced Options", open=False):
+                    text2image_out_guidance_scale = gr.Slider(
+                        minimum=0.1,
+                        maximum=15,
+                        step=0.1,
+                        value=7.5,
+                        label='Guidance Scale'
+                    )
+                    text2image_out_num_inference_step = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=50,
+                        label='Num Inference Step'
+                    )
+                    text2image_out_step_size = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=10,
+                        label='Step Size'
+                    )
+                    text2image_out_num_frames = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=10,
+                        label='Frames'
+                    )
+                    text2image_out_fps = gr.Slider(
+                        minimum=1,
+                        maximum=100,
+                        step=1,
+                        value=30,
+                        label='FPS'
+                    )
+                text2image_out_predict = gr.Button(value='Generator')
+            with gr.Column():
+                output_image = gr.Image(label='Output')
+        text2image_out_predict.click(
+            fn=stable_diffusion_zoom_out,
+            inputs=[
+                text2image_out_model_path,
+                text2image_out_prompt,
+                text2image_out_negative_prompt,
+                text2image_out_guidance_scale,
+                text2image_out_num_inference_step,
+                text2image_out_step_size,
+                text2image_out_num_frames,
+                text2image_out_fps
+            ],
+            outputs=output_image
+        )

inpaint_zoom/zoom_out_utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import numpy as np
+import cv2
+from PIL import Image
+def write_video(file_path, frames, fps):
+    """
+    Writes frames to an mp4 video file
+    :param file_path: Path to output video, must end with .mp4
+    :param frames: List of PIL.Image objects
+    :param fps: Desired frame rate
+    """
+    w, h = frames[0].size
+    fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
+    writer = cv2.VideoWriter(file_path, fourcc, fps, (w, h))
+    for frame in frames:
+        np_frame = np.array(frame.convert('RGB'))
+        cv_frame = cv2.cvtColor(np_frame, cv2.COLOR_RGB2BGR)
+        writer.write(cv_frame)
+    writer.release()
+def dummy(images, **kwargs):
+    return images, False
+def preprocess_image(current_image, steps, image_size):
+    next_image = np.array(current_image.convert("RGBA"))*0
+    prev_image = current_image.resize((image_size-2*steps,image_size-2*steps))
+    prev_image = prev_image.convert("RGBA")
+    prev_image = np.array(prev_image)
+    next_image[:, :, 3] = 1
+    next_image[steps:image_size-steps,steps:image_size-steps,:] = prev_image
+    prev_image = Image.fromarray(next_image)
+    return prev_image
+def preprocess_mask_image(current_image):
+    mask_image = np.array(current_image)[:,:,3] # assume image has alpha mask (use .mode to check for "RGBA")
+    mask_image = Image.fromarray(255-mask_image).convert("RGB")
+    current_image = current_image.convert("RGB")
+    return current_image, mask_image