Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / cookbook /main /en /stable_diffusion_interpolation.html

rtrm

about 1 month ago

download

raw

69.6 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Images Interpolation with Stable Diffusion","local":"images-interpolation-with-stable-diffusion","sections":[{"title":"Model","local":"model","sections":[],"depth":3},{"title":"Generation parameters","local":"generation-parameters","sections":[],"depth":3},{"title":"Example 1: Prompt interpolation","local":"example-1-prompt-interpolation","sections":[],"depth":3},{"title":"Example 2: Diffusion latents interpolation for a single prompt","local":"example-2-diffusion-latents-interpolation-for-a-single-prompt","sections":[],"depth":3},{"title":"Example 3: Interpolation between multiple prompts","local":"example-3-interpolation-between-multiple-prompts","sections":[],"depth":3},{"title":"Example 4: Circular walk through the diffusion latent space for a single prompt","local":"example-4-circular-walk-through-the-diffusion-latent-space-for-a-single-prompt","sections":[],"depth":3}],"depth":2}">
	<link href="/docs/cookbook/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/entry/start.96b44205.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/chunks/scheduler.65852ee5.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/chunks/singletons.a64a46c3.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/chunks/paths.f88132ad.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/entry/app.e92a3d99.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/chunks/index.aa74147d.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/nodes/0.0809e592.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/nodes/44.4d061b73.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/chunks/DocNotebookDropdown.479f4286.js">
	<link rel="modulepreload" href="/docs/cookbook/main/en/_app/immutable/chunks/EditOnGithub.4eda6a96.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Images Interpolation with Stable Diffusion","local":"images-interpolation-with-stable-diffusion","sections":[{"title":"Model","local":"model","sections":[],"depth":3},{"title":"Generation parameters","local":"generation-parameters","sections":[],"depth":3},{"title":"Example 1: Prompt interpolation","local":"example-1-prompt-interpolation","sections":[],"depth":3},{"title":"Example 2: Diffusion latents interpolation for a single prompt","local":"example-2-diffusion-latents-interpolation-for-a-single-prompt","sections":[],"depth":3},{"title":"Example 3: Interpolation between multiple prompts","local":"example-3-interpolation-between-multiple-prompts","sections":[],"depth":3},{"title":"Example 4: Circular walk through the diffusion latent space for a single prompt","local":"example-4-circular-walk-through-the-diffusion-latent-space-for-a-single-prompt","sections":[],"depth":3}],"depth":2}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <a href="https://colab.research.google.com/github/huggingface/cookbook/blob/multiagent_assist_improvements/notebooks/en/stable_diffusion_interpolation.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> </div> <h2 class="relative group"><a id="images-interpolation-with-stable-diffusion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#images-interpolation-with-stable-diffusion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Images Interpolation with Stable Diffusion</span></h2> <p data-svelte-h="svelte-xo5wq0"><em>Authored by: <a href="https://github.com/AkiRusProd" rel="nofollow">Rustam Akimov</a></em></p> <p data-svelte-h="svelte-13o2aov">This notebook shows how to use Stable Diffusion to interpolate between images. Image interpolation using Stable Diffusion is the process of creating intermediate images that smoothly transition from one given image to another, using a generative model based on diffusion.</p> <p data-svelte-h="svelte-ubo7tx">Here are some various use cases for image interpolation with Stable Diffusion:</p> <ul data-svelte-h="svelte-he0og3"><li>Data Augmentation: Stable Diffusion can augment training data for machine learning models by generating synthetic images that lie between existing data points. This can improve the generalization and robustness of machine learning models, especially in tasks like image generation, classification or object detection.</li> <li>Product Design and Prototyping: Stable Diffusion can aid in product design by generating variations of product designs or prototypes with subtle differences. This can be useful for exploring design alternatives, conducting user studies, or visualizing design iterations before committing to physical prototypes.</li> <li>Content Generation for Media Production: In media production, such as film and video editing, Stable Diffusion can be used to generate intermediate frames between key frames, enabling smoother transitions and enhancing visual storytelling. This can save time and resources compared to manual frame-by-frame editing.</li></ul> <p data-svelte-h="svelte-1pybxoa">In the context of image interpolation, Stable Diffusion models are often used to navigate through a high-dimensional latent space. Each dimension represents a specific feature that has been learned by the model. By walking through this latent space and interpolating between different latent representations of images, the model is able to generate a sequence of intermediate images which show a smooth transition between the original images. There are two types of latents in stable diffusion: prompt latents and image latents.</p> <p data-svelte-h="svelte-axm9ar">Latent space walking involves moving through a latent space along a path defined by two or more points (representing images). By carefully selecting these points and the path between them, it is possible to control the features of the generated images, such as style, content, and other visual aspects.</p> <p data-svelte-h="svelte-16porze">In this Notebook, we will explore examples of image interpolation using Stable Diffusion and demonstrate how latent space walking can be implemented and utilized to create smooth transitions between images. We’ll provide code snippets and visualizations that illustrate this process in action, allowing for a deeper understanding of how generative models can manipulate and morph image representations in meaningful ways.</p> <p data-svelte-h="svelte-13zokni">First, let’s install all the required modules.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install -q diffusers transformers xformers accelerate
	!pip install -q numpy scipy ftfy Pillow<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1tw65d0">Import modules</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
	<span class="hljs-keyword">import</span> os

	<span class="hljs-keyword">import</span> time

	<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
	<span class="hljs-keyword">from</span> IPython <span class="hljs-keyword">import</span> display <span class="hljs-keyword">as</span> IPdisplay
	<span class="hljs-keyword">from</span> tqdm.auto <span class="hljs-keyword">import</span> tqdm

	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> (
	DDIMScheduler,
	PNDMScheduler,
	LMSDiscreteScheduler,
	DPMSolverMultistepScheduler,
	EulerAncestralDiscreteScheduler,
	EulerDiscreteScheduler,
	)
	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> logging

	logging.set_verbosity_error()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1hwxkpg">Let’s check if CUDA is available.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">print</span>(torch.cuda.is_available())

	device = torch.device(<span class="hljs-string">"cuda"</span>) <span class="hljs-keyword">if</span> torch.cuda.is_available() <span class="hljs-keyword">else</span> torch.device(<span class="hljs-string">"cpu"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-157lcra">These settings are used to optimize the performance of PyTorch models on CUDA-enabled GPUs, especially when using mixed precision training or inference, which can be beneficial in terms of speed and memory usage.<br>
	Source: <a href="https://huggingface.co/docs/diffusers/optimization/fp16#memory-efficient-attention" rel="nofollow">https://huggingface.co/docs/diffusers/optimization/fp16#memory-efficient-attention</a></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->torch.backends.cudnn.benchmark = <span class="hljs-literal">True</span>
	torch.backends.cuda.matmul.allow_tf32 = <span class="hljs-literal">True</span><!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model</span></h3> <p data-svelte-h="svelte-1xuhc40">The <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>runwayml/stable-diffusion-v1-5</code></a> model and the <a href="https://huggingface.co/docs/diffusers/en/api/schedulers/lms_discrete" rel="nofollow"><code>LMSDiscreteScheduler</code></a> scheduler were chosen to generate images. Despite being an older technology, it continues to enjoy popularity due to its fast performance, minimal memory requirements, and the availability of numerous community fine-tuned models built on top of SD1.5. However, you are free to experiment with other models and schedulers to compare the results.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model_name_or_path = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>

	scheduler = LMSDiscreteScheduler(
	beta_start=<span class="hljs-number">0.00085</span>, beta_end=<span class="hljs-number">0.012</span>, beta_schedule=<span class="hljs-string">"scaled_linear"</span>, num_train_timesteps=<span class="hljs-number">1000</span>
	)


	pipe = StableDiffusionPipeline.from_pretrained(
	model_name_or_path,
	scheduler=scheduler,
	torch_dtype=torch.float32,
	).to(device)

	<span class="hljs-comment"># Disable image generation progress bar, we'll display our own</span>
	pipe.set_progress_bar_config(disable=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ule0bd">These methods are designed to reduce the memory consumed by the GPU. If you have enough VRAM, you can skip this cell.</p> <p data-svelte-h="svelte-1cnpf90">More detailed information can be found here: <a href="https://huggingface.co/docs/diffusers/en/optimization/opt_overview" rel="nofollow">https://huggingface.co/docs/diffusers/en/optimization/opt_overview</a><br>
	In particular, information about the following methods can be found here: <a href="https://huggingface.co/docs/diffusers/optimization/memory" rel="nofollow">https://huggingface.co/docs/diffusers/optimization/memory</a></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Offloading the weights to the CPU and only loading them on the GPU can reduce memory consumption to less than 3GB.</span>
	pipe.enable_model_cpu_offload()

	<span class="hljs-comment"># Tighter ordering of memory tensors.</span>
	pipe.unet.to(memory_format=torch.channels_last)

	<span class="hljs-comment"># Decoding large batches of images with limited VRAM or batches with 32 images or more by decoding the batches of latents one image at a time.</span>
	pipe.enable_vae_slicing()

	<span class="hljs-comment"># Splitting the image into overlapping tiles, decoding the tiles, and then blending the outputs together to compose the final image.</span>
	pipe.enable_vae_tiling()

	<span class="hljs-comment"># Using Flash Attention; If you have PyTorch >= 2.0 installed, you should not expect a speed-up for inference when enabling xformers.</span>
	pipe.enable_xformers_memory_efficient_attention()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1x293fi">The <code>display_images</code> function converts a list of image arrays into a GIF, saves it to a specified path and returns the GIF object for display. It names the GIF file using the current time and handles any errors by printing them out.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">display_images</span>(<span class="hljs-params">images, save_path</span>):
	<span class="hljs-keyword">try</span>:
	<span class="hljs-comment"># Convert each image in the 'images' list from an array to an Image object.</span>
	images = [Image.fromarray(np.array(image[<span class="hljs-number">0</span>], dtype=np.uint8)) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> images]

	<span class="hljs-comment"># Generate a file name based on the current time, replacing colons with hyphens</span>
	<span class="hljs-comment"># to ensure the filename is valid for file systems that don't allow colons.</span>
	filename = time.strftime(<span class="hljs-string">"%H:%M:%S"</span>, time.localtime()).replace(<span class="hljs-string">":"</span>, <span class="hljs-string">"-"</span>)
	<span class="hljs-comment"># Save the first image in the list as a GIF file at the 'save_path' location.</span>
	<span class="hljs-comment"># The rest of the images in the list are added as subsequent frames to the GIF.</span>
	<span class="hljs-comment"># The GIF will play each frame for 100 milliseconds and will loop indefinitely.</span>
	images[<span class="hljs-number">0</span>].save(
	<span class="hljs-string">f"<span class="hljs-subst">{save_path}</span>/<span class="hljs-subst">{filename}</span>.gif"</span>,
	save_all=<span class="hljs-literal">True</span>,
	append_images=images[<span class="hljs-number">1</span>:],
	duration=<span class="hljs-number">100</span>,
	loop=<span class="hljs-number">0</span>,
	)
	<span class="hljs-keyword">except</span> Exception <span class="hljs-keyword">as</span> e:
	<span class="hljs-comment"># If there is an error during the process, print the exception message.</span>
	<span class="hljs-built_in">print</span>(e)

	<span class="hljs-comment"># Return the saved GIF as an IPython display object so it can be displayed in a notebook.</span>
	<span class="hljs-keyword">return</span> IPdisplay.Image(<span class="hljs-string">f"<span class="hljs-subst">{save_path}</span>/<span class="hljs-subst">{filename}</span>.gif"</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="generation-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generation-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Generation parameters</span></h3> <ul data-svelte-h="svelte-avta92"><li><code>seed</code>: This variable is used to set a specific random seed for reproducibility.</li> <li><code>generator</code>: This is set to a PyTorch random number generator object if a seed is provided, otherwise it is None. It ensures that the operations using it have reproducible outcomes.</li> <li><code>guidance_scale</code>: This parameter controls the extent to which the model should follow the prompt in text-to-image generation tasks, with higher values leading to stronger adherence to the prompt.</li> <li><code>num_inference_steps</code>: This specifies the number of steps the model takes to generate an image. More steps can lead to a higher quality image but take longer to generate.</li> <li><code>num_interpolation_steps</code>: This determines the number of steps used when interpolating between two points in the latent space, affecting the smoothness of transitions in generated animations.</li> <li><code>height</code>: The height of the generated images in pixels.</li> <li><code>width</code>: The width of the generated images in pixels.</li> <li><code>save_path</code>: The file system path where the generated gifs will be saved.</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># The seed is set to "None", because we want different results each time we run the generation.</span>
	seed = <span class="hljs-literal">None</span>

	<span class="hljs-keyword">if</span> seed <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span>:
	generator = torch.manual_seed(seed)
	<span class="hljs-keyword">else</span>:
	generator = <span class="hljs-literal">None</span>

	<span class="hljs-comment"># The guidance scale is set to its normal range (7 - 10).</span>
	guidance_scale = <span class="hljs-number">8</span>

	<span class="hljs-comment"># The number of inference steps was chosen empirically to generate an acceptable picture within an acceptable time.</span>
	num_inference_steps = <span class="hljs-number">15</span>

	<span class="hljs-comment"># The higher you set this value, the smoother the interpolations will be. However, the generation time will increase. This value was chosen empirically.</span>
	num_interpolation_steps = <span class="hljs-number">30</span>

	<span class="hljs-comment"># I would not recommend less than 512 on either dimension. This is because this model was trained on 512x512 image resolution.</span>
	height = <span class="hljs-number">512</span>
	width = <span class="hljs-number">512</span>

	<span class="hljs-comment"># The path where the generated GIFs will be saved</span>
	save_path = <span class="hljs-string">"/output"</span>

	<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> os.path.exists(save_path):
	os.makedirs(save_path)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="example-1-prompt-interpolation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-1-prompt-interpolation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example 1: Prompt interpolation</span></h3> <p data-svelte-h="svelte-t1vj9q">In this example, interpolation between positive and negative prompt embeddings allows exploration of space between two conceptual points defined by prompts, potentially leading to variety of images blending characteristics dictated by prompts gradually. In this case, interpolation involves adding scaled deltas to original embeddings, creating a series of new embeddings that will be used later to generate images with smooth transitions between different states based on the original prompt.</p> <p data-svelte-h="svelte-1x05mg4"><img src="https://huggingface.co/datasets/huggingface/cookbook-images/resolve/main/sd_interpolation_1.gif" alt="Example 1"></p> <p data-svelte-h="svelte-19kzzzg">First of all, we need to tokenize and obtain embeddings for both positive and negative text prompts. The positive prompt guides the image generation towards the desired characteristics, while the negative prompt steers it away from unwanted features.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># The text prompt that describes the desired output image.</span>
	prompt = <span class="hljs-string">"Epic shot of Sweden, ultra detailed lake with an ren dear, nostalgic vintage, ultra cozy and inviting, wonderful light atmosphere, fairy, little photorealistic, digital painting, sharp focus, ultra cozy and inviting, wish to be there. very detailed, arty, should rank high on youtube for a dream trip."</span>
	<span class="hljs-comment"># A negative prompt that can be used to steer the generation away from certain features; here, it is empty.</span>
	negative_prompt = <span class="hljs-string">"poorly drawn,cartoon, 2d, disfigured, bad art, deformed, poorly drawn, extra limbs, close up, b&w, weird colors, blurry"</span>

	<span class="hljs-comment"># The step size for the interpolation in the latent space.</span>
	step_size = <span class="hljs-number">0.001</span>

	<span class="hljs-comment"># Tokenizing and encoding the prompt into embeddings.</span>
	prompt_tokens = pipe.tokenizer(
	prompt,
	padding=<span class="hljs-string">"max_length"</span>,
	max_length=pipe.tokenizer.model_max_length,
	truncation=<span class="hljs-literal">True</span>,
	return_tensors=<span class="hljs-string">"pt"</span>,
	)
	prompt_embeds = pipe.text_encoder(prompt_tokens.input_ids.to(device))[<span class="hljs-number">0</span>]


	<span class="hljs-comment"># Tokenizing and encoding the negative prompt into embeddings.</span>
	<span class="hljs-keyword">if</span> negative_prompt <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
	negative_prompt = [<span class="hljs-string">""</span>]

	negative_prompt_tokens = pipe.tokenizer(
	negative_prompt,
	padding=<span class="hljs-string">"max_length"</span>,
	max_length=pipe.tokenizer.model_max_length,
	truncation=<span class="hljs-literal">True</span>,
	return_tensors=<span class="hljs-string">"pt"</span>,
	)
	negative_prompt_embeds = pipe.text_encoder(negative_prompt_tokens.input_ids.to(device))[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-szx5hm">Now let’s look at the code part that generates a random initial vector using a normal distribution that is structured to match the dimensions expected by the diffusion model (UNet). This allows for the reproducibility of the results by optionally using a random number generator. After creating the initial vector, the code performs a series of interpolations between the two embeddings (positive and negative prompts), by incrementally adding a small step size for each iteration. The results are stored in a list named “walked_embeddings”.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Generating initial latent vectors from a random normal distribution, with the option to use a generator for reproducibility.</span>
	latents = torch.randn(
	(<span class="hljs-number">1</span>, pipe.unet.config.in_channels, height // <span class="hljs-number">8</span>, width // <span class="hljs-number">8</span>),
	generator=generator,
	)

	walked_embeddings = []

	<span class="hljs-comment"># Interpolating between embeddings for the given number of interpolation steps.</span>
	<span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_interpolation_steps):
	walked_embeddings.append([prompt_embeds + step_size * i, negative_prompt_embeds + step_size * i])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1oqz59z">Finally, let’s generate a series of images based on interpolated embeddings and then displaying these images. We’ll iterate over an array of embeddings, using each to generate an image with specified characteristics like height, width, and other parameters relevant to image generation. Then we’ll collect these images into a list. Once generation is complete we’ll call the <code>display_image</code> function to save and display these images as GIF at a given save path.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Generating images using the interpolated embeddings.</span>
	images = []
	<span class="hljs-keyword">for</span> latent <span class="hljs-keyword">in</span> tqdm(walked_embeddings):
	images.append(
	pipe(
	height=height,
	width=width,
	num_images_per_prompt=<span class="hljs-number">1</span>,
	prompt_embeds=latent[<span class="hljs-number">0</span>],
	negative_prompt_embeds=latent[<span class="hljs-number">1</span>],
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	latents=latents,
	).images
	)

	<span class="hljs-comment"># Display of saved generated images.</span>
	display_images(images, save_path)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="example-2-diffusion-latents-interpolation-for-a-single-prompt" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-2-diffusion-latents-interpolation-for-a-single-prompt"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example 2: Diffusion latents interpolation for a single prompt</span></h3> <p data-svelte-h="svelte-1y6nb0t">Unlike the first example, in this one, we are performing interpolation between the two embeddings of the diffusion model itself, not the prompts. Please note that in this case, we use the slerp function for interpolation. However, there is nothing stopping us from adding a constant value to one embedding instead.</p> <p data-svelte-h="svelte-zqtywg"><img src="https://huggingface.co/datasets/huggingface/cookbook-images/resolve/main/sd_interpolation_2.gif" alt="Example 2"></p> <p data-svelte-h="svelte-1it4256">The function presented below stands for Spherical Linear Interpolation. It is a method of interpolation on the surface of a sphere. This function is commonly used in computer graphics to animate rotations in a smooth manner and can also be used to interpolate between high-dimensional data points in machine learning, such as latent vectors used in generative models.</p> <p data-svelte-h="svelte-1winyti">The source is from Andrej Karpathy’s gist: <a href="https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355" rel="nofollow">https://gist.github.com/karpathy/00103b0037c5aaea32fe1da1af553355</a>.<br>
	A more detailed explanation of this method can be found at: <a href="https://en.wikipedia.org/wiki/Slerp" rel="nofollow">https://en.wikipedia.org/wiki/Slerp</a>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">slerp</span>(<span class="hljs-params">v0, v1, num, t0=<span class="hljs-number">0</span>, t1=<span class="hljs-number">1</span></span>):
	v0 = v0.detach().cpu().numpy()
	v1 = v1.detach().cpu().numpy()

	<span class="hljs-keyword">def</span> <span class="hljs-title function_">interpolation</span>(<span class="hljs-params">t, v0, v1, DOT_THRESHOLD=<span class="hljs-number">0.9995</span></span>):
	<span class="hljs-string">"""helper function to spherically interpolate two arrays v1 v2"""</span>
	dot = np.<span class="hljs-built_in">sum</span>(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1)))
	<span class="hljs-keyword">if</span> np.<span class="hljs-built_in">abs</span>(dot) > DOT_THRESHOLD:
	v2 = (<span class="hljs-number">1</span> - t) * v0 + t * v1
	<span class="hljs-keyword">else</span>:
	theta_0 = np.arccos(dot)
	sin_theta_0 = np.sin(theta_0)
	theta_t = theta_0 * t
	sin_theta_t = np.sin(theta_t)
	s0 = np.sin(theta_0 - theta_t) / sin_theta_0
	s1 = sin_theta_t / sin_theta_0
	v2 = s0 * v0 + s1 * v1
	<span class="hljs-keyword">return</span> v2

	t = np.linspace(t0, t1, num)

	v3 = torch.tensor(np.array([interpolation(t[i], v0, v1) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num)]))

	<span class="hljs-keyword">return</span> v3<!-- HTML_TAG_END --></pre></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># The text prompt that describes the desired output image.</span>
	prompt = (
	<span class="hljs-string">"Sci-fi digital painting of an alien landscape with otherworldly plants, strange creatures, and distant planets."</span>
	)
	<span class="hljs-comment"># A negative prompt that can be used to steer the generation away from certain features.</span>
	negative_prompt = <span class="hljs-string">"poorly drawn,cartoon, 3d, disfigured, bad art, deformed, poorly drawn, extra limbs, close up, b&w, weird colors, blurry"</span>

	<span class="hljs-comment"># Generating initial latent vectors from a random normal distribution. In this example two latent vectors are generated, which will serve as start and end points for the interpolation.</span>
	<span class="hljs-comment"># These vectors are shaped to fit the input requirements of the diffusion model's U-Net architecture.</span>
	latents = torch.randn(
	(<span class="hljs-number">2</span>, pipe.unet.config.in_channels, height // <span class="hljs-number">8</span>, width // <span class="hljs-number">8</span>),
	generator=generator,
	)

	<span class="hljs-comment"># Getting our latent embeddings</span>
	interpolated_latents = slerp(latents[<span class="hljs-number">0</span>], latents[<span class="hljs-number">1</span>], num_interpolation_steps)

	<span class="hljs-comment"># Generating images using the interpolated embeddings.</span>
	images = []
	<span class="hljs-keyword">for</span> latent_vector <span class="hljs-keyword">in</span> tqdm(interpolated_latents):
	images.append(
	pipe(
	prompt,
	height=height,
	width=width,
	negative_prompt=negative_prompt,
	num_images_per_prompt=<span class="hljs-number">1</span>,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	latents=latent_vector[<span class="hljs-literal">None</span>, ...],
	).images
	)

	<span class="hljs-comment"># Display of saved generated images.</span>
	display_images(images, save_path)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="example-3-interpolation-between-multiple-prompts" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-3-interpolation-between-multiple-prompts"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example 3: Interpolation between multiple prompts</span></h3> <p data-svelte-h="svelte-15a2nb7">In contrast to the first example, where we moved away from a single prompt, in this example, we will be interpolating between any number of prompts. To do so, we will take consecutive pairs of prompts and create smooth transitions between them. Then, we will combine the interpolations of these consecutive pairs, and instruct the model to generate images based on them. For interpolation we will use the slerp function, as in the second example.</p> <p data-svelte-h="svelte-1uv5js0"><img src="https://huggingface.co/datasets/huggingface/cookbook-images/resolve/main/sd_interpolation_3.gif" alt="Example 3"></p> <p data-svelte-h="svelte-u335b5">Once again, let’s tokenize and obtain embeddings but this time for multiple positive and negative text prompts.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Text prompts that describes the desired output image.</span>
	prompts = [
	<span class="hljs-string">"A cute dog in a beautiful field of lavander colorful flowers everywhere, perfect lighting, leica summicron 35mm f2.0, kodak portra 400, film grain"</span>,
	<span class="hljs-string">"A cute cat in a beautiful field of lavander colorful flowers everywhere, perfect lighting, leica summicron 35mm f2.0, kodak portra 400, film grain"</span>,
	]
	<span class="hljs-comment"># Negative prompts that can be used to steer the generation away from certain features.</span>
	negative_prompts = [
	<span class="hljs-string">"poorly drawn,cartoon, 2d, sketch, cartoon, drawing, anime, disfigured, bad art, deformed, poorly drawn, extra limbs, close up, b&w, weird colors, blurry"</span>,
	<span class="hljs-string">"poorly drawn,cartoon, 2d, sketch, cartoon, drawing, anime, disfigured, bad art, deformed, poorly drawn, extra limbs, close up, b&w, weird colors, blurry"</span>,
	]

	<span class="hljs-comment"># <span class="hljs-doctag">NOTE:</span> The number of prompts must match the number of negative prompts</span>

	batch_size = <span class="hljs-built_in">len</span>(prompts)

	<span class="hljs-comment"># Tokenizing and encoding prompts into embeddings.</span>
	prompts_tokens = pipe.tokenizer(
	prompts,
	padding=<span class="hljs-string">"max_length"</span>,
	max_length=pipe.tokenizer.model_max_length,
	truncation=<span class="hljs-literal">True</span>,
	return_tensors=<span class="hljs-string">"pt"</span>,
	)
	prompts_embeds = pipe.text_encoder(prompts_tokens.input_ids.to(device))[<span class="hljs-number">0</span>]

	<span class="hljs-comment"># Tokenizing and encoding negative prompts into embeddings.</span>
	<span class="hljs-keyword">if</span> negative_prompts <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
	negative_prompts = [<span class="hljs-string">""</span>] * batch_size

	negative_prompts_tokens = pipe.tokenizer(
	negative_prompts,
	padding=<span class="hljs-string">"max_length"</span>,
	max_length=pipe.tokenizer.model_max_length,
	truncation=<span class="hljs-literal">True</span>,
	return_tensors=<span class="hljs-string">"pt"</span>,
	)
	negative_prompts_embeds = pipe.text_encoder(negative_prompts_tokens.input_ids.to(device))[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rpvl23">As stated earlier, we will take consecutive pairs of prompts and create smooth transitions between them with <code>slerp</code> function.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Generating initial U-Net latent vectors from a random normal distribution.</span>
	latents = torch.randn(
	(<span class="hljs-number">1</span>, pipe.unet.config.in_channels, height // <span class="hljs-number">8</span>, width // <span class="hljs-number">8</span>),
	generator=generator,
	)

	<span class="hljs-comment"># Interpolating between embeddings pairs for the given number of interpolation steps.</span>
	interpolated_prompt_embeds = []
	interpolated_negative_prompts_embeds = []
	<span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(batch_size - <span class="hljs-number">1</span>):
	interpolated_prompt_embeds.append(slerp(prompts_embeds[i], prompts_embeds[i + <span class="hljs-number">1</span>], num_interpolation_steps))
	interpolated_negative_prompts_embeds.append(
	slerp(
	negative_prompts_embeds[i],
	negative_prompts_embeds[i + <span class="hljs-number">1</span>],
	num_interpolation_steps,
	)
	)

	interpolated_prompt_embeds = torch.cat(interpolated_prompt_embeds, dim=<span class="hljs-number">0</span>).to(device)

	interpolated_negative_prompts_embeds = torch.cat(interpolated_negative_prompts_embeds, dim=<span class="hljs-number">0</span>).to(device)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1sxcfxg">Finally, we need to generate images based on the embeddings.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Generating images using the interpolated embeddings.</span>
	images = []
	<span class="hljs-keyword">for</span> prompt_embeds, negative_prompt_embeds <span class="hljs-keyword">in</span> tqdm(
	<span class="hljs-built_in">zip</span>(interpolated_prompt_embeds, interpolated_negative_prompts_embeds),
	total=<span class="hljs-built_in">len</span>(interpolated_prompt_embeds),
	):
	images.append(
	pipe(
	height=height,
	width=width,
	num_images_per_prompt=<span class="hljs-number">1</span>,
	prompt_embeds=prompt_embeds[<span class="hljs-literal">None</span>, ...],
	negative_prompt_embeds=negative_prompt_embeds[<span class="hljs-literal">None</span>, ...],
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	latents=latents,
	).images
	)

	<span class="hljs-comment"># Display of saved generated images.</span>
	display_images(images, save_path)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="example-4-circular-walk-through-the-diffusion-latent-space-for-a-single-prompt" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-4-circular-walk-through-the-diffusion-latent-space-for-a-single-prompt"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example 4: Circular walk through the diffusion latent space for a single prompt</span></h3> <p data-svelte-h="svelte-pjhh7v">This example was taken from: <a href="https://keras.io/examples/generative/random_walks_with_stable_diffusion/" rel="nofollow">https://keras.io/examples/generative/random_walks_with_stable_diffusion/</a></p> <p data-svelte-h="svelte-1v4hr5x">Let’s imagine that we have two noise components, which we’ll call x and y. We start by moving from 0 to 2π and at each step we add the cosine of x and the sine of y to the result. Using this approach, at the end of our movement we end up with the same noise values that we started with. This means that vectors end up turning into themselves, ending our movement.</p> <p data-svelte-h="svelte-1h51p8s"><img src="https://huggingface.co/datasets/huggingface/cookbook-images/resolve/main/sd_interpolation_4.gif" alt="Example 4"></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># The text prompt that describes the desired output image.</span>
	prompt = <span class="hljs-string">"Beautiful sea sunset, warm light, Aivazovsky style"</span>
	<span class="hljs-comment"># A negative prompt that can be used to steer the generation away from certain features</span>
	negative_prompt = <span class="hljs-string">"picture frames"</span>

	<span class="hljs-comment"># Generating initial latent vectors from a random normal distribution to create a loop interpolation between them.</span>
	latents = torch.randn(
	(<span class="hljs-number">2</span>, <span class="hljs-number">1</span>, pipe.unet.config.in_channels, height // <span class="hljs-number">8</span>, width // <span class="hljs-number">8</span>),
	generator=generator,
	)


	<span class="hljs-comment"># Calculation of looped embeddings</span>
	walk_noise_x = latents[<span class="hljs-number">0</span>].to(device)
	walk_noise_y = latents[<span class="hljs-number">1</span>].to(device)

	<span class="hljs-comment"># Walking on a trigonometric circle</span>
	walk_scale_x = torch.cos(torch.linspace(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, num_interpolation_steps) * np.pi).to(device)
	walk_scale_y = torch.sin(torch.linspace(<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, num_interpolation_steps) * np.pi).to(device)

	<span class="hljs-comment"># Applying interpolation to noise</span>
	noise_x = torch.tensordot(walk_scale_x, walk_noise_x, dims=<span class="hljs-number">0</span>)
	noise_y = torch.tensordot(walk_scale_y, walk_noise_y, dims=<span class="hljs-number">0</span>)

	circular_latents = noise_x + noise_y

	<span class="hljs-comment"># Generating images using the interpolated embeddings.</span>
	images = []
	<span class="hljs-keyword">for</span> latent_vector <span class="hljs-keyword">in</span> tqdm(circular_latents):
	images.append(
	pipe(
	prompt,
	height=height,
	width=width,
	negative_prompt=negative_prompt,
	num_images_per_prompt=<span class="hljs-number">1</span>,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator,
	latents=latent_vector,
	).images
	)

	<span class="hljs-comment"># Display of saved generated images.</span>
	display_images(images, save_path)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next Steps</span></h2> <p data-svelte-h="svelte-a25mhu">Moving forward, you can explore various parameters such as guidance scale, seed, and number of interpolation steps to observe how they affect the generated images. Additionally, consider trying out different prompts and schedulers to further enhance your results. Another valuable step would be to implement linear interpolation (<code>linspace</code>) instead of spherical linear interpolation (<code>slerp</code>) and compare the results to gain deeper insights into the interpolation process.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/cookbook/blob/main/notebooks/en/stable_diffusion_interpolation.md" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1l2350x = {
	assets: "/docs/cookbook/main/en",
	base: "/docs/cookbook/main/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/cookbook/main/en/_app/immutable/entry/start.96b44205.js"),
	import("/docs/cookbook/main/en/_app/immutable/entry/app.e92a3d99.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 44],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 69.6 kB
Xet hash:: b9d97e80efcbb2513f1efc72827bb162661df7896f48dbb214f1427ef73c5f95

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.