import PIL
import requests
import torch
import gradio as gr
import random
from PIL import Image
import os
import time
from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
#Loading from Diffusers Library
model_id = "timbrooks/instruct-pix2pix"
pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="fp16", safety_checker=None)
pipe.to("cuda")
#pipe.enable_attention_slicing()
pipe.enable_xformers_memory_efficient_attention()
pipe.unet.to(memory_format=torch.channels_last)
help_text = """
**Note: Please be advised that a safety checker has been implemented in this public space.
Any attempts to generate inappropriate or NSFW images will result in the display of a black screen
as a precautionary measure to protect all users. We appreciate your cooperation in
maintaining a safe and appropriate environment for all members of our community.**
New features and bug-fixes:
1. Chat style interface
2. Now use **'reverse'** as prompt to get back the previous image after an unwanted edit
3. Use **'restart'** as prompt to get back to original image and start over!
4. Now you can load larger image files (~5 mb) as well
Some notes from the official [instruct-pix2pix](https://huggingface.co/spaces/timbrooks/instruct-pix2pix) Space by the authors and from the official [Diffusers docs](https://huggingface.co/docs/diffusers/main/en/api/pipelines/stable_diffusion/pix2pix) -
If you're not getting what you want, there may be a few reasons:
1. Is the image not changing enough? Your guidance_scale may be too low. It should be >1. Higher guidance scale encourages to generate images
that are closely linked to the text `prompt`, usually at the expense of lower image quality. This value dictates how similar the output should
be to the input. This pipeline requires a value of at least `1`. It's possible your edit requires larger changes from the original image.
2. Alternatively, you can toggle image_guidance_scale. Image guidance scale is to push the generated image towards the inital image. Image guidance
scale is enabled by setting `image_guidance_scale > 1`. Higher image guidance scale encourages to generate images that are closely
linked to the source image `image`, usually at the expense of lower image quality.
3. I have observed that rephrasing the instruction sometimes improves results (e.g., "turn him into a dog" vs. "make him a dog" vs. "as a dog").
4. Increasing the number of steps sometimes improves results.
5. Do faces look weird? The Stable Diffusion autoencoder has a hard time with faces that are small in the image. Try:
* Cropping the image so the face takes up a larger portion of the frame.
"""
def previous(image):
return image
def upload_image(file):
return Image.open(file)
def upload_button_config():
return gr.update(visible=False)
def upload_textbox_config(text_in):
return gr.update(visible=True)
def dummy_fn():
return 'dummy'
def chat(btn_upload, image_in, in_steps, in_guidance_scale, in_img_guidance_scale, image_hid, img_name, counter_out, image_oneup, prompt, history, progress=gr.Progress(track_tqdm=True)):
progress(0, desc="Starting...")
if prompt != '' and prompt.lower() == 'reverse' : #--to add revert functionality later
history = history or []
temp_img_name = img_name[:-4]+str(int(time.time()))+'.png'
image_oneup.save(temp_img_name)
response = 'Reverted to the last image ' + ''
history.append((prompt, response))
return history, history, image_oneup, temp_img_name, counter_out
if prompt != '' and prompt.lower() == 'restart' : #--to add revert functionality later
history = history or []
temp_img_name = img_name[:-4]+str(int(time.time()))+'.png'
#Resizing the image
basewidth = 512
wpercent = (basewidth/float(image_in.size[0]))
hsize = int((float(image_in.size[1])*float(wpercent)))
image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
image_in.save(temp_img_name)
response = 'Reverted to the last image ' + '
'
history.append((prompt, response))
return history, history, image_in, temp_img_name, counter_out
#adding supportive sample text
add_text_list = ["There you go", "Enjoy your image!", "Nice work! Wonder what you gonna do next!", "Way to go!", "Does this work for you?", "Something like this?"]
if counter_out == 0:
t1 = time.time()
print(f"Time at start = {t1}")
seed = random.randint(0, 1000000)
img_name = f"./edited_image_{seed}.png"
#convert file object to image
image_in = Image.open(btn_upload)
#Resizing the image
basewidth = 512
wpercent = (basewidth/float(image_in.size[0]))
hsize = int((float(image_in.size[1])*float(wpercent)))
image_in = image_in.resize((basewidth,hsize), Image.Resampling.LANCZOS)
#if os.path.exists(img_name):
# os.remove(img_name)
#with open(img_name, "wb") as fp:
# Save the image to the file-like object
image_in.save(img_name)
#Get the name of the saved image
#saved_image_name0 = fp.name
history = history or []
response = '
'
history.append((prompt, response))
counter_out += 1
t2 = time.time()
print(f"Time at end = {t2}")
time_diff = t2-t1
print(f"Time taken = {time_diff}")
return history, history, image_in, img_name, counter_out
elif counter_out == 1:
#instruct-pix2pix inference
edited_image = pipe(prompt, image=image_in, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
if os.path.exists(img_name):
os.remove(img_name)
temp_img_name = img_name[:-4]+str(int(time.time()))[-4:] +'.png'
with open(temp_img_name, "wb") as fp:
# Save the image to the file-like object
edited_image.save(fp)
#Get the name of the saved image
saved_image_name1 = fp.name
history = history or []
response = random.choice(add_text_list) + '
' #IMG_NAME
history.append((prompt, response))
counter_out += 1
return history, history, edited_image, temp_img_name, counter_out
elif counter_out > 1:
edited_image = pipe(prompt, image=image_hid, num_inference_steps=int(in_steps), guidance_scale=float(in_guidance_scale), image_guidance_scale=float(in_img_guidance_scale)).images[0]
if os.path.exists(img_name):
os.remove(img_name)
temp_img_name = img_name[:-4]+str(int(time.time()))[-4:]+'.png'
# Create a file-like object
with open(temp_img_name, "wb") as fp:
# Save the image to the file-like object
edited_image.save(fp)
#Get the name of the saved image
saved_image_name2 = fp.name
#edited_image.save(temp_img_name) #, overwrite=True)
history = history or []
response = random.choice(add_text_list) + '
'
history.append((prompt, response))
counter_out += 1
return history, history, edited_image, temp_img_name, counter_out
#Blocks layout
with gr.Blocks(css="style.css") as demo:
with gr.Column(elem_id="col-container") as main_col:
gr.HTML("""
For faster inference without waiting in the queue, you may duplicate the space and upgrade to GPU in settings
Diffusers implementation of instruct-pix2pix - InstructPix2Pix: Learning to Follow Image Editing Instructions!