| from __future__ import annotations |
|
|
| import math |
| import random |
| import torch |
| from PIL import Image, ImageOps |
| from diffusers import StableDiffusionInstructPix2PixPipeline |
| import streamlit as st |
|
|
| |
| help_text = """ |
| If you're not getting what you want, there may be a few reasons: |
| 1. Is the image not changing enough? Your Image CFG weight may be too high. This value dictates how similar the output should be to the input. |
| 2. Conversely, is the image changing too much, such that the details in the original image aren't preserved? Try: |
| * Increasing the Image CFG weight, or |
| * Decreasing the Text CFG weight |
| 3. Try generating results with different random seeds by setting "Randomize Seed". |
| """ |
|
|
| |
| example_instructions = [ |
| "Make it a picasso painting", |
| "Turn it into an anime.", |
| "add dramatic lighting", |
| "Convert to black and white", |
| ] |
|
|
| |
| model_id = "timbrooks/instruct-pix2pix" |
| pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None).to("cpu") |
|
|
| |
| def main(): |
| st.title("InstructPix2Pix Image Editing") |
| |
| st.markdown(help_text) |
| |
| |
| uploaded_image = st.file_uploader("Upload an Image", type=["png", "jpg", "jpeg"]) |
| |
| if uploaded_image is not None: |
| input_image = Image.open(uploaded_image).convert("RGB") |
| st.image(input_image, caption="Uploaded Image", width=512) |
| else: |
| st.warning("Please upload an image to proceed.") |
| return |
| |
| |
| instruction = st.selectbox("Choose an instruction or type your own", example_instructions) |
| custom_instruction = st.text_input("Or type your custom instruction", "") |
| if custom_instruction: |
| instruction = custom_instruction |
| |
| |
| steps = st.slider("Steps", min_value=20, max_value=100, value=50, step=1) |
| randomize_seed = st.checkbox("Randomize Seed", value=True) |
| seed = st.number_input("Seed (Only used if Randomize Seed is disabled)", min_value=0, value=random.randint(0, 10000)) |
| |
| text_cfg_scale = st.slider("Text CFG", min_value=1.0, max_value=10.0, value=7.5, step=0.1) |
| image_cfg_scale = st.slider("Image CFG", min_value=0.5, max_value=2.0, value=1.5, step=0.1) |
|
|
| |
| if st.button("Generate Edited Image"): |
| with st.spinner("Generating the edited image..."): |
| result_image = generate(input_image, instruction, steps, randomize_seed, seed, text_cfg_scale, image_cfg_scale) |
| st.image(result_image, caption="Edited Image", width=512) |
| |
| |
| st.download_button("Download Image", data=result_image.tobytes(), file_name="edited_image.png", mime="image/png") |
|
|
| |
| def generate(input_image: Image.Image, instruction: str, steps: int, randomize_seed: bool, seed: int, text_cfg_scale: float, image_cfg_scale: float): |
| |
| if randomize_seed: |
| seed = random.randint(0, 100000) |
| |
| |
| width, height = input_image.size |
| factor = 512 / max(width, height) |
| width = int((width * factor) // 64) * 64 |
| height = int((height * factor) // 64) * 64 |
| input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS) |
|
|
| |
| generator = torch.manual_seed(seed) |
| edited_image = pipe( |
| instruction, image=input_image, |
| guidance_scale=text_cfg_scale, image_guidance_scale=image_cfg_scale, |
| num_inference_steps=steps, generator=generator, |
| ).images[0] |
| |
| return edited_image |
|
|
| if __name__ == "__main__": |
| main() |