| import gradio as gr |
| import numpy as np |
| |
| from PIL import Image |
| from utils import Endpoint, get_token |
| from io import BytesIO |
| import requests |
|
|
|
|
| endpoint = Endpoint() |
|
|
| def local_edict(x, source_text, edit_text, |
| edit_strength, guidance_scale, |
| steps=50, mix_weight=0.93, ): |
| x = Image.fromarray(x) |
| return_im = EDICT_editing(x, |
| source_text, |
| edit_text, |
| steps=steps, |
| mix_weight=mix_weight, |
| init_image_strength=edit_strength, |
| guidance_scale=guidance_scale |
| )[0] |
| return np.array(return_im) |
|
|
| def encode_image(image): |
| buffered = BytesIO() |
| image.save(buffered, format="JPEG", quality=95) |
| buffered.seek(0) |
|
|
| return buffered |
|
|
|
|
|
|
| def decode_image(img_obj): |
| img = Image.open(img_obj).convert("RGB") |
| return img |
|
|
| def edict(x, source_text, edit_text, |
| edit_strength, guidance_scale, |
| steps=50, mix_weight=0.93, ): |
|
|
| url = endpoint.url |
| url = url + "/api/edit" |
| headers = { |
|
|
| "User-Agent": "EDICT HuggingFace Space", |
| "Auth-Token": get_token(), |
| } |
|
|
| data = { |
| "source_text": source_text, |
| "edit_text": edit_text, |
| "edit_strength": edit_strength, |
| "guidance_scale": guidance_scale, |
| } |
|
|
| image = encode_image(Image.fromarray(x)) |
| files = {"image": image} |
|
|
| response = requests.post(url, data=data, files=files, headers=headers) |
| |
| if response.status_code == 200: |
| return np.array(decode_image(BytesIO(response.content))) |
| else: |
| return "Error: " + response.text |
| |
| |
|
|
| examples = [ |
| ['square_ims/american_gothic.jpg', 'A painting of two people frowning', 'A painting of two people smiling', 0.5, 3], |
| ['square_ims/colloseum.jpg', 'An old ruined building', 'A new modern office building', 0.8, 3], |
| ] |
|
|
|
|
| examples.append(['square_ims/scream.jpg', 'A painting of someone screaming', 'A painting of an alien', 0.5, 3]) |
| examples.append(['square_ims/yosemite.jpg', 'Granite forest valley', 'Granite desert valley', 0.8, 3]) |
| examples.append(['square_ims/einstein.jpg', 'Mouth open', 'Mouth closed', 0.8, 3]) |
| examples.append(['square_ims/einstein.jpg', 'A man', 'A man in K.I.S.S. facepaint', 0.8, 3]) |
| """ |
| examples.extend([ |
| ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Chinese New Year cupcake', 0.8, 3], |
| ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Union Jack cupcake', 0.8, 3], |
| ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Nigerian flag cupcake', 0.8, 3], |
| ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A Santa Claus cupcake', 0.8, 3], |
| ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'An Easter cupcake', 0.8, 3], |
| ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A hedgehog cupcake', 0.8, 3], |
| ['square_ims/imagenet_cake_2.jpg', 'A cupcake', 'A rose cupcake', 0.8, 3], |
| ]) |
| """ |
|
|
| for dog_i in [1, 2]: |
| for breed in ['Golden Retriever', 'Chihuahua', 'Dalmatian']: |
| examples.append([f'square_ims/imagenet_dog_{dog_i}.jpg', 'A dog', f'A {breed}', 0.8, 3]) |
|
|
|
|
| description = '**For safety and ethical considerations, we have disabled image uploading from March 21. 2023.\nPlease try examples provided below.**\nA gradio demo for [EDICT](https://arxiv.org/abs/2211.12446) (CVPR23)' |
| |
|
|
| article = """ |
| |
| ### Prompting Style |
| |
| As with many text-to-image methods, the prompting style of EDICT can make a big difference. When in doubt, experiment! Some guidance: |
| * Parallel *Original Description* and *Edit Description* construction as much as possible. Inserting/editing single words often is enough to affect a change while maintaining a lot of the original structure |
| * Words that will affect the entire setting (e.g. "A photo of " vs. "A painting of") can make a big difference. Playing around with them can help a lot |
| |
| ### Parameters |
| Both `edit_strength` and `guidance_scale` have similar properties qualitatively: the higher the value the more the image will change. We suggest |
| * Increasing/decreasing `edit_strength` first, particularly to alter/preserve more of the original structure/content |
| * Then changing `guidance_scale` to make the change in the edited region more or less pronounced. |
| |
| Usually we find changing `edit_strength` to be enough, but feel free to play around (and report any interesting results)! |
| |
| ### Misc. |
| |
| Having difficulty coming up with a caption? Try [BLIP](https://huggingface.co/spaces/Salesforce/BLIP2) to automatically generate one! |
| |
| As with most StableDiffusion approaches, faces/text are often problematic to render, especially if they're small. Having these in the foreground will help keep them cleaner. |
| |
| A returned black image means that the [Safety Checker](https://huggingface.co/CompVis/stable-diffusion-safety-checker) triggered on the photo. This happens in odd cases sometimes (it often rejects |
| the huggingface logo or variations), but we need to keep it in for obvious reasons. |
| """ |
| |
|
|
| iface = gr.Interface(fn=edict, inputs=[gr.Image(interactive=False), |
| gr.Textbox(label="Original Description"), |
| gr.Textbox(label="Edit Description"), |
| |
| |
| gr.Slider(0.0, 1, value=0.8, step=0.05), |
| gr.Slider(0, 10, value=3, step=0.5), |
| ], |
| |
| outputs="image", |
| description=description, |
| article=article, |
| |
| ) |
| iface.launch() |
|
|