Buckets:

hf-doc-build
/

doc

Files

xet

hf-doc-build/doc / diffusers /v0.22.2 /en /training /instructpix2pix.html

rtrm

about 1 month ago

download

raw

37.4 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"InstructPix2Pix","local":"instructpix2pix","sections":[{"title":"Running locally with PyTorch","local":"running-locally-with-pytorch","sections":[{"title":"Installing the dependencies","local":"installing-the-dependencies","sections":[],"depth":3},{"title":"Toy example","local":"toy-example","sections":[],"depth":3}],"depth":2},{"title":"Training with multiple GPUs","local":"training-with-multiple-gpus","sections":[],"depth":2},{"title":"Inference","local":"inference","sections":[],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/diffusers/v0.22.2/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/scheduler.182ea377.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/singletons.60172a60.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.1f6d62f6.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/paths.49cddc6d.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.abf12888.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/0.efabe74f.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/140.df122fa1.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/CodeBlock.57fe6e13.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Heading.16916d63.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"InstructPix2Pix","local":"instructpix2pix","sections":[{"title":"Running locally with PyTorch","local":"running-locally-with-pytorch","sections":[{"title":"Installing the dependencies","local":"installing-the-dependencies","sections":[],"depth":3},{"title":"Toy example","local":"toy-example","sections":[],"depth":3}],"depth":2},{"title":"Training with multiple GPUs","local":"training-with-multiple-gpus","sections":[],"depth":2},{"title":"Inference","local":"inference","sections":[],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="instructpix2pix" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#instructpix2pix"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>InstructPix2Pix</span></h1> <p data-svelte-h="svelte-rvcs8b"><a href="https://arxiv.org/abs/2211.09800" rel="nofollow">InstructPix2Pix</a> is a method to fine-tune text-conditioned diffusion models such that they can follow an edit instruction for an input image. Models fine-tuned using this method take the following as inputs:</p> <p align="center" data-svelte-h="svelte-boa587"><img src="https://huggingface.co/datasets/diffusers/docs-images/resolve/main/evaluation_diffusion_models/edit-instruction.png" alt="instructpix2pix-inputs" width="600/"></p> <p data-svelte-h="svelte-1ue6mnk">The output is an “edited” image that reflects the edit instruction applied on the input image:</p> <p align="center" data-svelte-h="svelte-5k7lrs"><img src="https://huggingface.co/datasets/diffusers/docs-images/resolve/main/output-gs%407-igs%401-steps%4050.png" alt="instructpix2pix-output" width="600/"></p> <p data-svelte-h="svelte-ofa1ib">The <code>train_instruct_pix2pix.py</code> script (you can find the it <a href="https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/train_instruct_pix2pix.py" rel="nofollow">here</a>) shows how to implement the training procedure and adapt it for Stable Diffusion.</p> <p data-svelte-h="svelte-1yi2f71"><strong><em>Disclaimer: Even though <code>train_instruct_pix2pix.py</code> implements the InstructPix2Pix
	training procedure while being faithful to the <a href="https://github.com/timothybrooks/instruct-pix2pix" rel="nofollow">original implementation</a> we have only tested it on a <a href="https://huggingface.co/datasets/fusing/instructpix2pix-1000-samples" rel="nofollow">small-scale dataset</a>. This can impact the end results. For better results, we recommend longer training runs with a larger dataset. <a href="https://huggingface.co/datasets/timbrooks/instructpix2pix-clip-filtered" rel="nofollow">Here</a> you can find a large dataset for InstructPix2Pix training.</em></strong></p> <h2 class="relative group"><a id="running-locally-with-pytorch" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#running-locally-with-pytorch"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Running locally with PyTorch</span></h2> <h3 class="relative group"><a id="installing-the-dependencies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installing-the-dependencies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Installing the dependencies</span></h3> <p data-svelte-h="svelte-19lpyev">Before running the scripts, make sure to install the library’s training dependencies:</p> <p data-svelte-h="svelte-hai21j"><strong>Important</strong></p> <p data-svelte-h="svelte-yv8b1i">To make sure you can successfully run the latest versions of the example scripts, we highly recommend <strong>installing from source</strong> and keeping the install up to date as we update the example scripts frequently and install some example-specific requirements. To do this, execute the following steps in a new virtual environment:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->git <span class="hljs-built_in">clone</span> https://github.com/huggingface/diffusers
	<span class="hljs-built_in">cd</span> diffusers
	pip install -e .<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-blzon4">Then cd in the example folder</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> examples/instruct_pix2pix<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gehqux">Now run</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -r requirements.txt<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1apictl">And initialize an <a href="https://github.com/huggingface/accelerate/" rel="nofollow">🤗Accelerate</a> environment with:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gl04sb">Or for a default accelerate configuration without answering questions about your environment</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config default<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-wgvbkc">Or if your environment doesn’t support an interactive shell e.g. a notebook</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate.utils <span class="hljs-keyword">import</span> write_basic_config

	write_basic_config()<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="toy-example" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#toy-example"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Toy example</span></h3> <p data-svelte-h="svelte-1450gzw">As mentioned before, we’ll use a <a href="https://huggingface.co/datasets/fusing/instructpix2pix-1000-samples" rel="nofollow">small toy dataset</a> for training. The dataset
	is a smaller version of the <a href="https://huggingface.co/datasets/timbrooks/instructpix2pix-clip-filtered" rel="nofollow">original dataset</a> used in the InstructPix2Pix paper. To use your own dataset, take a look at the <a href="create_dataset">Create a dataset for training</a> guide.</p> <p data-svelte-h="svelte-pub3aq">Specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <a href="https://huggingface.co/docs/diffusers/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained.pretrained_model_name_or_path" rel="nofollow"><code>pretrained_model_name_or_path</code></a> argument. You’ll also need to specify the dataset name in <code>DATASET_ID</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span>
	<span class="hljs-built_in">export</span> DATASET_ID=<span class="hljs-string">"fusing/instructpix2pix-1000-samples"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-lpm3b6">Now, we can launch training. The script saves all the components (<code>feature_extractor</code>, <code>scheduler</code>, <code>text_encoder</code>, <code>unet</code>, etc) in a subfolder in your repository.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch --mixed_precision=<span class="hljs-string">"fp16"</span> train_instruct_pix2pix.py \
	--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
	--dataset_name=<span class="hljs-variable">$DATASET_ID</span> \
	--enable_xformers_memory_efficient_attention \
	--resolution=256 --random_flip \
	--train_batch_size=4 --gradient_accumulation_steps=4 --gradient_checkpointing \
	--max_train_steps=15000 \
	--checkpointing_steps=5000 --checkpoints_total_limit=1 \
	--learning_rate=5e-05 --max_grad_norm=1 --lr_warmup_steps=0 \
	--conditioning_dropout_prob=0.05 \
	--mixed_precision=fp16 \
	--seed=42 \
	--push_to_hub<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-lh171p">Additionally, we support performing validation inference to monitor training progress
	with Weights and Biases. You can enable this feature with <code>report_to="wandb"</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch --mixed_precision=<span class="hljs-string">"fp16"</span> train_instruct_pix2pix.py \
	--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
	--dataset_name=<span class="hljs-variable">$DATASET_ID</span> \
	--enable_xformers_memory_efficient_attention \
	--resolution=256 --random_flip \
	--train_batch_size=4 --gradient_accumulation_steps=4 --gradient_checkpointing \
	--max_train_steps=15000 \
	--checkpointing_steps=5000 --checkpoints_total_limit=1 \
	--learning_rate=5e-05 --max_grad_norm=1 --lr_warmup_steps=0 \
	--conditioning_dropout_prob=0.05 \
	--mixed_precision=fp16 \
	--val_image_url=<span class="hljs-string">"https://hf.co/datasets/diffusers/diffusers-images-docs/resolve/main/mountain.png"</span> \
	--validation_prompt=<span class="hljs-string">"make the mountains snowy"</span> \
	--seed=42 \
	--report_to=wandb \
	--push_to_hub<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ngvvxh">We recommend this type of validation as it can be useful for model debugging. Note that you need <code>wandb</code> installed to use this. You can install <code>wandb</code> by running <code>pip install wandb</code>.</p> <p data-svelte-h="svelte-ep8cx1"><a href="https://wandb.ai/sayakpaul/instruct-pix2pix/runs/ctr3kovq" rel="nofollow">Here</a>, you can find an example training run that includes some validation samples and the training hyperparameters.</p> <p data-svelte-h="svelte-fvnmbv"><strong><em>Note: In the original paper, the authors observed that even when the model is trained with an image resolution of 256x256, it generalizes well to bigger resolutions such as 512x512. This is likely because of the larger dataset they used during training.</em></strong></p> <h2 class="relative group"><a id="training-with-multiple-gpus" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-with-multiple-gpus"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training with multiple GPUs</span></h2> <p data-svelte-h="svelte-1tkye3l"><code>accelerate</code> allows for seamless multi-GPU training. Follow the instructions <a href="https://huggingface.co/docs/accelerate/basic_tutorials/launch" rel="nofollow">here</a>
	for running distributed training with <code>accelerate</code>. Here is an example command:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch --mixed_precision=<span class="hljs-string">"fp16"</span> --multi_gpu train_instruct_pix2pix.py \
	--pretrained_model_name_or_path=runwayml/stable-diffusion-v1-5 \
	--dataset_name=sayakpaul/instructpix2pix-1000-samples \
	--use_ema \
	--enable_xformers_memory_efficient_attention \
	--resolution=512 --random_flip \
	--train_batch_size=4 --gradient_accumulation_steps=4 --gradient_checkpointing \
	--max_train_steps=15000 \
	--checkpointing_steps=5000 --checkpoints_total_limit=1 \
	--learning_rate=5e-05 --lr_warmup_steps=0 \
	--conditioning_dropout_prob=0.05 \
	--mixed_precision=fp16 \
	--seed=42 \
	--push_to_hub<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference</span></h2> <p data-svelte-h="svelte-109q2no">Once training is complete, we can perform inference:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> PIL
	<span class="hljs-keyword">import</span> requests
	<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionInstructPix2PixPipeline

	model_id = <span class="hljs-string">"your_model_id"</span> <span class="hljs-comment"># <- replace this</span>
	pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
	model_id, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>
	).to(<span class="hljs-string">"cuda"</span>)
	generator = torch.Generator(<span class="hljs-string">"cuda"</span>).manual_seed(<span class="hljs-number">0</span>)

	url = <span class="hljs-string">"https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/test_pix2pix_4.png"</span>


	<span class="hljs-keyword">def</span> <span class="hljs-title function_">download_image</span>(<span class="hljs-params">url</span>):
	image = PIL.Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw)
	image = PIL.ImageOps.exif_transpose(image)
	image = image.convert(<span class="hljs-string">"RGB"</span>)
	<span class="hljs-keyword">return</span> image


	image = download_image(url)
	prompt = <span class="hljs-string">"wipe out the lake"</span>
	num_inference_steps = <span class="hljs-number">20</span>
	image_guidance_scale = <span class="hljs-number">1.5</span>
	guidance_scale = <span class="hljs-number">10</span>

	edited_image = pipe(
	prompt,
	image=image,
	num_inference_steps=num_inference_steps,
	image_guidance_scale=image_guidance_scale,
	guidance_scale=guidance_scale,
	generator=generator,
	).images[<span class="hljs-number">0</span>]
	edited_image.save(<span class="hljs-string">"edited_image.png"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11ipsjg">An example model repo obtained using this training script can be found
	here - <a href="https://huggingface.co/sayakpaul/instruct-pix2pix" rel="nofollow">sayakpaul/instruct-pix2pix</a>.</p> <p data-svelte-h="svelte-1jap03f">We encourage you to play with the following three parameters to control
	speed and quality during performance:</p> <ul data-svelte-h="svelte-1k7xfoe"><li><code>num_inference_steps</code></li> <li><code>image_guidance_scale</code></li> <li><code>guidance_scale</code></li></ul> <p data-svelte-h="svelte-9uns02">Particularly, <code>image_guidance_scale</code> and <code>guidance_scale</code> can have a profound impact
	on the generated (“edited”) image (see <a href="https://twitter.com/RisingSayak/status/1628392199196151808?s=20" rel="nofollow">here</a> for an example).</p> <p data-svelte-h="svelte-xahvpu">If you’re looking for some interesting ways to use the InstructPix2Pix training methodology, we welcome you to check out this blog post: <a href="https://huggingface.co/blog/instruction-tuning-sd" rel="nofollow">Instruction-tuning Stable Diffusion with InstructPix2Pix</a>.</p> <h2 class="relative group"><a id="stable-diffusion-xl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-diffusion-xl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stable Diffusion XL</span></h2> <p data-svelte-h="svelte-1g4l1qf">Training with <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">Stable Diffusion XL</a> is also supported via the <code>train_instruct_pix2pix_sdxl.py</code> script. Please refer to the docs <a href="https://github.com/huggingface/diffusers/blob/main/examples/instruct_pix2pix/README_sdxl.md" rel="nofollow">here</a>.</p> <p></p>

	<script>
	{
	__sveltekit_ynvcvq = {
	assets: "/docs/diffusers/v0.22.2/en",
	base: "/docs/diffusers/v0.22.2/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js"),
	import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 140],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 37.4 kB
Xet hash:: df7155788faee8ef56cd3511756bc2e095ea8ec002e86a9f0da3c6acc9ac9491

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.