mgcztx / app.py
Geek7's picture
Update app.py
33dcb15 verified
from __future__ import annotations
import math
import random
import torch
from PIL import Image, ImageOps
from diffusers import StableDiffusionInstructPix2PixPipeline
import streamlit as st
# Help text to be displayed in the app
help_text = """
If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your Image CFG weight may be too high. This value dictates how similar the output should be to the input.
2. Conversely, is the image changing too much, such that the details in the original image aren't preserved? Try:
* Increasing the Image CFG weight, or
* Decreasing the Text CFG weight
3. Try generating results with different random seeds by setting "Randomize Seed".
"""
# Example instructions for users to test
example_instructions = [
"Make it a picasso painting",
"Turn it into an anime.",
"add dramatic lighting",
"Convert to black and white",
]
# Load the model from Hugging Face
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None).to("cpu")
# Main Streamlit App
def main():
st.title("InstructPix2Pix Image Editing")
st.markdown(help_text)
# Upload input image
uploaded_image = st.file_uploader("Upload an Image", type=["png", "jpg", "jpeg"])
if uploaded_image is not None:
input_image = Image.open(uploaded_image).convert("RGB")
st.image(input_image, caption="Uploaded Image", width=512)
else:
st.warning("Please upload an image to proceed.")
return
# Choose or type in instruction for image edit
instruction = st.selectbox("Choose an instruction or type your own", example_instructions)
custom_instruction = st.text_input("Or type your custom instruction", "")
if custom_instruction:
instruction = custom_instruction
# Control parameters for generation
steps = st.slider("Steps", min_value=20, max_value=100, value=50, step=1)
randomize_seed = st.checkbox("Randomize Seed", value=True)
seed = st.number_input("Seed (Only used if Randomize Seed is disabled)", min_value=0, value=random.randint(0, 10000))
text_cfg_scale = st.slider("Text CFG", min_value=1.0, max_value=10.0, value=7.5, step=0.1)
image_cfg_scale = st.slider("Image CFG", min_value=0.5, max_value=2.0, value=1.5, step=0.1)
# Process button
if st.button("Generate Edited Image"):
with st.spinner("Generating the edited image..."):
result_image = generate(input_image, instruction, steps, randomize_seed, seed, text_cfg_scale, image_cfg_scale)
st.image(result_image, caption="Edited Image", width=512)
# Download the edited image
st.download_button("Download Image", data=result_image.tobytes(), file_name="edited_image.png", mime="image/png")
# Generate the edited image
def generate(input_image: Image.Image, instruction: str, steps: int, randomize_seed: bool, seed: int, text_cfg_scale: float, image_cfg_scale: float):
# Handle seed
if randomize_seed:
seed = random.randint(0, 100000)
# Resize the input image to 512x512 (Stable Diffusion requires square images)
width, height = input_image.size
factor = 512 / max(width, height)
width = int((width * factor) // 64) * 64
height = int((height * factor) // 64) * 64
input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS)
# Generate the edited image using the Pix2Pix pipeline
generator = torch.manual_seed(seed)
edited_image = pipe(
instruction, image=input_image,
guidance_scale=text_cfg_scale, image_guidance_scale=image_cfg_scale,
num_inference_steps=steps, generator=generator,
).images[0]
return edited_image
if __name__ == "__main__":
main()