Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import torch | |
| from diffusers import DDIMScheduler | |
| from pytorch_lightning import seed_everything | |
| from masactrl.diffuser_utils import MasaCtrlPipeline | |
| from masactrl.masactrl_utils import (AttentionBase, | |
| regiter_attention_editor_diffusers) | |
| from .app_utils import global_context | |
| torch.set_grad_enabled(False) | |
| # device = torch.device("cuda") if torch.cuda.is_available() else torch.device( | |
| # "cpu") | |
| # model_path = "andite/anything-v4.0" | |
| # scheduler = DDIMScheduler(beta_start=0.00085, | |
| # beta_end=0.012, | |
| # beta_schedule="scaled_linear", | |
| # clip_sample=False, | |
| # set_alpha_to_one=False) | |
| # model = MasaCtrlPipeline.from_pretrained(model_path, | |
| # scheduler=scheduler).to(device) | |
| def consistent_synthesis(source_prompt, target_prompt, starting_step, | |
| starting_layer, image_resolution, ddim_steps, scale, | |
| seed, appended_prompt, negative_prompt): | |
| from masactrl.masactrl import MutualSelfAttentionControl | |
| model = global_context["model"] | |
| device = global_context["device"] | |
| seed_everything(seed) | |
| with torch.no_grad(): | |
| if appended_prompt is not None: | |
| source_prompt += appended_prompt | |
| target_prompt += appended_prompt | |
| prompts = [source_prompt, target_prompt] | |
| # initialize the noise map | |
| start_code = torch.randn([1, 4, 64, 64], device=device) | |
| start_code = start_code.expand(len(prompts), -1, -1, -1) | |
| # inference the synthesized image without MasaCtrl | |
| editor = AttentionBase() | |
| regiter_attention_editor_diffusers(model, editor) | |
| target_image_ori = model([target_prompt], | |
| latents=start_code[-1:], | |
| guidance_scale=7.5) | |
| target_image_ori = target_image_ori.cpu().permute(0, 2, 3, 1).numpy() | |
| # inference the synthesized image with MasaCtrl | |
| # hijack the attention module | |
| controller = MutualSelfAttentionControl(starting_step, starting_layer) | |
| regiter_attention_editor_diffusers(model, controller) | |
| # inference the synthesized image | |
| image_masactrl = model(prompts, latents=start_code, guidance_scale=7.5) | |
| image_masactrl = image_masactrl.cpu().permute(0, 2, 3, 1).numpy() | |
| return [image_masactrl[0], target_image_ori[0], | |
| image_masactrl[1]] # source, fixed seed, masactrl | |
| def create_demo_synthesis(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## **Input Settings**") | |
| with gr.Row(): | |
| with gr.Column(): | |
| source_prompt = gr.Textbox( | |
| label="Source Prompt", | |
| value='1boy, casual, outdoors, sitting', | |
| interactive=True) | |
| target_prompt = gr.Textbox( | |
| label="Target Prompt", | |
| value='1boy, casual, outdoors, standing', | |
| interactive=True) | |
| with gr.Row(): | |
| ddim_steps = gr.Slider(label="DDIM Steps", | |
| minimum=1, | |
| maximum=999, | |
| value=50, | |
| step=1) | |
| starting_step = gr.Slider( | |
| label="Step of MasaCtrl", | |
| minimum=0, | |
| maximum=999, | |
| value=4, | |
| step=1) | |
| starting_layer = gr.Slider(label="Layer of MasaCtrl", | |
| minimum=0, | |
| maximum=16, | |
| value=10, | |
| step=1) | |
| run_btn = gr.Button("Run") | |
| with gr.Column(): | |
| appended_prompt = gr.Textbox(label="Appended Prompt", value='') | |
| negative_prompt = gr.Textbox(label="Negative Prompt", value='') | |
| with gr.Row(): | |
| image_resolution = gr.Slider(label="Image Resolution", | |
| minimum=256, | |
| maximum=768, | |
| value=512, | |
| step=64) | |
| scale = gr.Slider(label="CFG Scale", | |
| minimum=0.1, | |
| maximum=30.0, | |
| value=7.5, | |
| step=0.1) | |
| seed = gr.Slider(label="Seed", | |
| minimum=-1, | |
| maximum=2147483647, | |
| value=42, | |
| step=1) | |
| gr.Markdown("## **Output**") | |
| with gr.Row(): | |
| image_source = gr.Image(label="Source Image") | |
| image_fixed = gr.Image(label="Image with Fixed Seed") | |
| image_masactrl = gr.Image(label="Image with MasaCtrl") | |
| inputs = [ | |
| source_prompt, target_prompt, starting_step, starting_layer, | |
| image_resolution, ddim_steps, scale, seed, appended_prompt, | |
| negative_prompt | |
| ] | |
| run_btn.click(consistent_synthesis, inputs, | |
| [image_source, image_fixed, image_masactrl]) | |
| gr.Examples( | |
| [[ | |
| "1boy, bishounen, casual, indoors, sitting, coffee shop, bokeh", | |
| "1boy, bishounen, casual, indoors, standing, coffee shop, bokeh", | |
| 42 | |
| ], | |
| [ | |
| "1boy, casual, outdoors, sitting", | |
| "1boy, casual, outdoors, sitting, side view", 42 | |
| ], | |
| [ | |
| "1boy, casual, outdoors, sitting", | |
| "1boy, casual, outdoors, standing, clapping hands", 42 | |
| ], | |
| [ | |
| "1boy, casual, outdoors, sitting", | |
| "1boy, casual, outdoors, sitting, shows thumbs up", 42 | |
| ], | |
| [ | |
| "1boy, casual, outdoors, sitting", | |
| "1boy, casual, outdoors, sitting, with crossed arms", 42 | |
| ], | |
| [ | |
| "1boy, casual, outdoors, sitting", | |
| "1boy, casual, outdoors, sitting, rasing hands", 42 | |
| ]], | |
| [source_prompt, target_prompt, seed], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| demo_syntehsis = create_demo_synthesis() | |
| demo_synthesis.launch() | |