| import gradio as gr |
| import torch |
| from PIL import Image |
|
|
| import gradio as gr |
|
|
| gr.load("models/lambdalabs/sd-image-variations-diffusers").launch() |
|
|
|
|
| def ask(input_im, scale, steps, seed, images): |
| images = images |
| generator = torch.Generator(device=device).manual_seed(int(seed)) |
| |
| images_list = pipe( |
| 2*[input_im], |
| guidance_scale=scale, |
| num_inference_steps=steps, |
| generator=generator, |
| ) |
| |
| for i, image in enumerate(images_list["sample"]): |
| if(images_list["nsfw_content_detected"][i]): |
| safe_image = Image.open(r"unsafe.png") |
| images.append(safe_image) |
| else: |
| images.append(image) |
| return images |
|
|
| def main(input_im, n_pairs, scale, steps, seed): |
| print('Start the magic !') |
| images = [] |
| for i in range(n_pairs): |
| print('Asking for a new pair of image [' + str(i + 1) + '/' + str(n_pairs) + ']') |
| seed = seed+i |
| images = ask(input_im, scale, steps, seed, images) |
| print('Thanks to Sylvain, it worked like a charm!') |
| return images |
|
|
| device = "cuda" if torch.cuda.is_available() else "cpu" |
| pipe = StableDiffusionImageEmbedPipeline.from_pretrained( |
| "lambdalabs/sd-image-variations-diffusers", |
| revision="273115e88df42350019ef4d628265b8c29ef4af5", |
| ) |
| pipe = pipe.to(device) |
|
|
| inputs = [ |
| gr.Image(), |
| gr.Slider(1, 3, value=2, step=1, label="Pairs of images to ask"), |
| gr.Slider(0, 25, value=3, step=1, label="Guidance scale"), |
| gr.Slider(5, 50, value=25, step=5, label="Steps"), |
| gr.Slider(label = "Seed", minimum = 0, maximum = 2147483647, step = 1, randomize = True) |
| ] |
| output = gr.Gallery(label="Generated variations") |
| output.style(grid=2, height="") |
|
|
| description = \ |
| """ |
| <p style='text-align: center;'>This demo is running on CPU. Working version fixed by Sylvain <a href='https://twitter.com/fffiloni' target='_blank'>@fffiloni</a>. You'll get n pairs of images variations. <br /> |
| Asking for pairs of images instead of more than 2 images in a row helps us to avoid heavy CPU load and connection error out ;)<br /> |
| Waiting time (for 2 pairs): ~5/10 minutes β’ NSFW filters enabled β’ <img id='visitor-badge' alt='visitor badge' src='https://visitor-badge.glitch.me/badge?page_id=gradio-blocks.sd-img-variations' style='display: inline-block' /><br /> |
| Generate variations on an input image using a fine-tuned version of Stable Diffusion.<br /> |
| Trained by <a href='https://www.justinpinkney.com' target='_blank'>Justin Pinkney</a> (<a href='https://twitter.com/Buntworthy' target='_blank'>@Buntworthy</a>) at <a href='https://lambdalabs.com/' target='_blank'>Lambda</a><br /> |
| This version has been ported to π€ Diffusers library, see more details on how to use this version in the <a href='https://github.com/LambdaLabsML/lambda-diffusers' target='_blank'>Lambda Diffusers repo</a>.<br /> |
| For the original training code see <a href='https://github.com/justinpinkney/stable-diffusion' target='_blank'>this repo</a>. |
| <img src='https://raw.githubusercontent.com/justinpinkney/stable-diffusion/main/assets/im-vars-thin.jpg' style='display: inline-block;' /> |
| </p> |
| """ |
|
|
| article = \ |
| """ |
| β |
| ## How does this work? |
| The normal Stable Diffusion model is trained to be conditioned on text input. This version has had the original text encoder (from CLIP) removed, and replaced with |
| the CLIP _image_ encoder instead. So instead of generating images based a text input, images are generated to match CLIP's embedding of the image. |
| This creates images which have the same rough style and content, but different details, in particular the composition is generally quite different. |
| This is a totally different approach to the img2img script of the original Stable Diffusion and gives very different results. |
| The model was fine tuned on the [LAION aethetics v2 6+ dataset](https://laion.ai/blog/laion-aesthetics/) to accept the new conditioning. |
| Training was done on 4xA6000 GPUs on [Lambda GPU Cloud](https://lambdalabs.com/service/gpu-cloud). |
| More details on the method and training will come in a future blog post. |
| """ |
|
|
| demo = gr.Interface( |
| fn=main, |
| title="Stable Diffusion Image Variations", |
| inputs=inputs, |
| outputs=output, |
| description=description, |
| article=article |
| ) |
| demo.launch() |
|
|