Buckets:

rtrm's picture
download
raw
53.2 kB
<meta charset="utf-8" /><meta http-equiv="content-security-policy" content=""><meta name="hf:doc:metadata" content="{&quot;local&quot;:&quot;controlnet&quot;,&quot;sections&quot;:[{&quot;local&quot;:&quot;installing-the-dependencies&quot;,&quot;title&quot;:&quot;Installing the dependencies&quot;},{&quot;local&quot;:&quot;circle-filling-dataset&quot;,&quot;title&quot;:&quot;Circle filling dataset&quot;},{&quot;local&quot;:&quot;training&quot;,&quot;title&quot;:&quot;Training&quot;},{&quot;local&quot;:&quot;training-with-multiple-gpus&quot;,&quot;title&quot;:&quot;Training with multiple GPUs&quot;},{&quot;local&quot;:&quot;example-results&quot;,&quot;sections&quot;:[{&quot;local&quot;:&quot;after-300-steps-with-batch-size-8&quot;,&quot;title&quot;:&quot;After 300 steps with batch size 8&quot;},{&quot;local&quot;:&quot;after-6000-steps-with-batch-size-8&quot;,&quot;title&quot;:&quot;After 6000 steps with batch size 8:&quot;}],&quot;title&quot;:&quot;Example results&quot;},{&quot;local&quot;:&quot;training-on-a-16-gb-gpu&quot;,&quot;title&quot;:&quot;Training on a 16 GB GPU&quot;},{&quot;local&quot;:&quot;training-on-a-12-gb-gpu&quot;,&quot;title&quot;:&quot;Training on a 12 GB GPU&quot;},{&quot;local&quot;:&quot;training-on-an-8-gb-gpu&quot;,&quot;title&quot;:&quot;Training on an 8 GB GPU&quot;},{&quot;local&quot;:&quot;inference&quot;,&quot;title&quot;:&quot;Inference&quot;}],&quot;title&quot;:&quot;ControlNet&quot;}" data-svelte="svelte-1phssyn">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/assets/pages/__layout.svelte-hf-doc-builder.css">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/start-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/chunks/vendor-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/chunks/paths-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/pages/__layout.svelte-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/pages/training/controlnet.mdx-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/chunks/Tip-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/chunks/IconCopyLink-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.19.2/en/_app/chunks/CodeBlock-hf-doc-builder.js">
<h1 class="relative group"><a id="controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>ControlNet
</span></h1>
<p><a href="https://arxiv.org/abs/2302.05543" rel="nofollow">Adding Conditional Control to Text-to-Image Diffusion Models</a> (ControlNet) by Lvmin Zhang and Maneesh Agrawala.</p>
<p>This example is based on the <a href="https://github.com/lllyasviel/ControlNet/blob/main/docs/train.md" rel="nofollow">training example in the original ControlNet repository</a>. It trains a ControlNet to fill circles using a <a href="https://huggingface.co/datasets/fusing/fill50k" rel="nofollow">small synthetic dataset</a>.</p>
<h2 class="relative group"><a id="installing-the-dependencies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installing-the-dependencies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Installing the dependencies
</span></h2>
<p>Before running the scripts, make sure to install the library’s training dependencies.</p>
<div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p>To successfully run the latest versions of the example scripts, we highly recommend <strong>installing from source</strong> and keeping the installation up to date. We update the example scripts frequently and install example-specific requirements.</p></div>
<p>To do this, execute the following steps in a new virtual environment:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->git <span class="hljs-built_in">clone</span> https://github.com/huggingface/diffusers
<span class="hljs-built_in">cd</span> diffusers
pip install -e .<!-- HTML_TAG_END --></pre></div>
<p>Then navigate into the <a href="https://github.com/huggingface/diffusers/tree/main/examples/controlnet" rel="nofollow">example folder</a></p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> examples/controlnet<!-- HTML_TAG_END --></pre></div>
<p>Now run:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->pip install -r requirements.txt<!-- HTML_TAG_END --></pre></div>
<p>And initialize an <a href="https://github.com/huggingface/accelerate/" rel="nofollow">🤗Accelerate</a> environment with:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div>
<p>Or for a default 🤗Accelerate configuration without answering questions about your environment:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->accelerate config default<!-- HTML_TAG_END --></pre></div>
<p>Or if your environment doesn’t support an interactive shell like a notebook:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate.utils <span class="hljs-keyword">import</span> write_basic_config
write_basic_config()<!-- HTML_TAG_END --></pre></div>
<h2 class="relative group"><a id="circle-filling-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#circle-filling-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Circle filling dataset
</span></h2>
<p>The original dataset is hosted in the ControlNet <a href="https://huggingface.co/lllyasviel/ControlNet/blob/main/training/fill50k.zip" rel="nofollow">repo</a>, but we re-uploaded it <a href="https://huggingface.co/datasets/fusing/fill50k" rel="nofollow">here</a> to be compatible with 🤗 Datasets so that it can handle the data loading within the training script.</p>
<p>Our training examples use <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>runwayml/stable-diffusion-v1-5</code></a> because that is what the original set of ControlNet models was trained on. However, ControlNet can be trained to augment any compatible Stable Diffusion model (such as <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4" rel="nofollow"><code>CompVis/stable-diffusion-v1-4</code></a>) or <a href="https://huggingface.co/stabilityai/stable-diffusion-2-1" rel="nofollow"><code>stabilityai/stable-diffusion-2-1</code></a>.</p>
<p>To use your own dataset, take a look at the <a href="create_dataset">Create a dataset for training</a> guide.</p>
<h2 class="relative group"><a id="training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Training
</span></h2>
<p>Download the following images to condition our training with:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png
wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png<!-- HTML_TAG_END --></pre></div>
<p>Specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <a href="https://huggingface.co/docs/diffusers/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained.pretrained_model_name_or_path" rel="nofollow"><code>pretrained_model_name_or_path</code></a> argument.</p>
<p>The training script creates and saves a <code>diffusion_pytorch_model.bin</code> file in your repository.</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;path to save model&quot;</span>
accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--dataset_name=fusing/fill50k \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image <span class="hljs-string">&quot;./conditioning_image_1.png&quot;</span> <span class="hljs-string">&quot;./conditioning_image_2.png&quot;</span> \
--validation_prompt <span class="hljs-string">&quot;red circle with blue background&quot;</span> <span class="hljs-string">&quot;cyan circle with brown floral background&quot;</span> \
--train_batch_size=4 \
--push_to_hub<!-- HTML_TAG_END --></pre></div>
<p>This default configuration requires ~38GB VRAM.</p>
<p>By default, the training script logs outputs to tensorboard. Pass <code>--report_to wandb</code> to use Weights &amp;
Biases.</p>
<p>Gradient accumulation with a smaller batch size can be used to reduce training requirements to ~20 GB VRAM.</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;path to save model&quot;</span>
accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--dataset_name=fusing/fill50k \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image <span class="hljs-string">&quot;./conditioning_image_1.png&quot;</span> <span class="hljs-string">&quot;./conditioning_image_2.png&quot;</span> \
--validation_prompt <span class="hljs-string">&quot;red circle with blue background&quot;</span> <span class="hljs-string">&quot;cyan circle with brown floral background&quot;</span> \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--push_to_hub<!-- HTML_TAG_END --></pre></div>
<h2 class="relative group"><a id="training-with-multiple-gpus" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-with-multiple-gpus"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Training with multiple GPUs
</span></h2>
<p><code>accelerate</code> allows for seamless multi-GPU training. Follow the instructions <a href="https://huggingface.co/docs/accelerate/basic_tutorials/launch" rel="nofollow">here</a>
for running distributed training with <code>accelerate</code>. Here is an example command:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;path to save model&quot;</span>
accelerate launch --mixed_precision=<span class="hljs-string">&quot;fp16&quot;</span> --multi_gpu train_controlnet.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--dataset_name=fusing/fill50k \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image <span class="hljs-string">&quot;./conditioning_image_1.png&quot;</span> <span class="hljs-string">&quot;./conditioning_image_2.png&quot;</span> \
--validation_prompt <span class="hljs-string">&quot;red circle with blue background&quot;</span> <span class="hljs-string">&quot;cyan circle with brown floral background&quot;</span> \
--train_batch_size=4 \
--mixed_precision=<span class="hljs-string">&quot;fp16&quot;</span> \
--tracker_project_name=<span class="hljs-string">&quot;controlnet-demo&quot;</span> \
--report_to=wandb \
--push_to_hub<!-- HTML_TAG_END --></pre></div>
<h2 class="relative group"><a id="example-results" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-results"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Example results
</span></h2>
<h4 class="relative group"><a id="after-300-steps-with-batch-size-8" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#after-300-steps-with-batch-size-8"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>After 300 steps with batch size 8
</span></h4>
<table><thead><tr><th></th>
<th align="center"></th></tr></thead>
<tbody><tr><td></td>
<td align="center">red circle with blue background</td></tr>
<tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png" alt="conditioning image"></td>
<td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/red_circle_with_blue_background_300_steps.png" alt="red circle with blue background"></td></tr>
<tr><td></td>
<td align="center">cyan circle with brown floral background</td></tr>
<tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png" alt="conditioning image"></td>
<td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/cyan_circle_with_brown_floral_background_300_steps.png" alt="cyan circle with brown floral background"></td></tr></tbody></table>
<h4 class="relative group"><a id="after-6000-steps-with-batch-size-8" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#after-6000-steps-with-batch-size-8"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>After 6000 steps with batch size 8:
</span></h4>
<table><thead><tr><th></th>
<th align="center"></th></tr></thead>
<tbody><tr><td></td>
<td align="center">red circle with blue background</td></tr>
<tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png" alt="conditioning image"></td>
<td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/red_circle_with_blue_background_6000_steps.png" alt="red circle with blue background"></td></tr>
<tr><td></td>
<td align="center">cyan circle with brown floral background</td></tr>
<tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png" alt="conditioning image"></td>
<td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/cyan_circle_with_brown_floral_background_6000_steps.png" alt="cyan circle with brown floral background"></td></tr></tbody></table>
<h2 class="relative group"><a id="training-on-a-16-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-a-16-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Training on a 16 GB GPU
</span></h2>
<p>Enable the following optimizations to train on a 16GB GPU:</p>
<ul><li>Gradient checkpointing</li>
<li>bitsandbyte’s 8-bit optimizer (take a look at the [installation]((<a href="https://github.com/TimDettmers/bitsandbytes#requirements--installation" rel="nofollow">https://github.com/TimDettmers/bitsandbytes#requirements—installation</a>) instructions if you don’t already have it installed)</li></ul>
<p>Now you can launch the training script:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;path to save model&quot;</span>
accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--dataset_name=fusing/fill50k \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image <span class="hljs-string">&quot;./conditioning_image_1.png&quot;</span> <span class="hljs-string">&quot;./conditioning_image_2.png&quot;</span> \
--validation_prompt <span class="hljs-string">&quot;red circle with blue background&quot;</span> <span class="hljs-string">&quot;cyan circle with brown floral background&quot;</span> \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--gradient_checkpointing \
--use_8bit_adam \
--push_to_hub<!-- HTML_TAG_END --></pre></div>
<h2 class="relative group"><a id="training-on-a-12-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-a-12-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Training on a 12 GB GPU
</span></h2>
<p>Enable the following optimizations to train on a 12GB GPU:</p>
<ul><li>Gradient checkpointing</li>
<li>bitsandbyte’s 8-bit optimizer (take a look at the [installation]((<a href="https://github.com/TimDettmers/bitsandbytes#requirements--installation" rel="nofollow">https://github.com/TimDettmers/bitsandbytes#requirements—installation</a>) instructions if you don’t already have it installed)</li>
<li>xFormers (take a look at the <a href="https://huggingface.co/docs/diffusers/training/optimization/xformers" rel="nofollow">installation</a> instructions if you don’t already have it installed)</li>
<li>set gradients to <code>None</code></li></ul>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;path to save model&quot;</span>
accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--dataset_name=fusing/fill50k \
--resolution=512 \
--learning_rate=1e-5 \
--validation_image <span class="hljs-string">&quot;./conditioning_image_1.png&quot;</span> <span class="hljs-string">&quot;./conditioning_image_2.png&quot;</span> \
--validation_prompt <span class="hljs-string">&quot;red circle with blue background&quot;</span> <span class="hljs-string">&quot;cyan circle with brown floral background&quot;</span> \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--gradient_checkpointing \
--use_8bit_adam \
--enable_xformers_memory_efficient_attention \
--set_grads_to_none \
--push_to_hub<!-- HTML_TAG_END --></pre></div>
<p>When using <code>enable_xformers_memory_efficient_attention</code>, please make sure to install <code>xformers</code> by <code>pip install xformers</code>. </p>
<h2 class="relative group"><a id="training-on-an-8-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-an-8-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Training on an 8 GB GPU
</span></h2>
<p>We have not exhaustively tested DeepSpeed support for ControlNet. While the configuration does
save memory, we have not confirmed whether the configuration trains successfully. You will very likely
have to make changes to the config to have a successful training run.</p>
<p>Enable the following optimizations to train on a 8GB GPU:</p>
<ul><li>Gradient checkpointing</li>
<li>bitsandbyte’s 8-bit optimizer (take a look at the [installation]((<a href="https://github.com/TimDettmers/bitsandbytes#requirements--installation" rel="nofollow">https://github.com/TimDettmers/bitsandbytes#requirements—installation</a>) instructions if you don’t already have it installed)</li>
<li>xFormers (take a look at the <a href="https://huggingface.co/docs/diffusers/training/optimization/xformers" rel="nofollow">installation</a> instructions if you don’t already have it installed)</li>
<li>set gradients to <code>None</code></li>
<li>DeepSpeed stage 2 with parameter and optimizer offloading</li>
<li>fp16 mixed precision</li></ul>
<p><a href="https://www.deepspeed.ai/" rel="nofollow">DeepSpeed</a> can offload tensors from VRAM to either
CPU or NVME. This requires significantly more RAM (about 25 GB).</p>
<p>You’ll have to configure your environment with <code>accelerate config</code> to enable DeepSpeed stage 2.</p>
<p>The configuration file should look like this:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-attr">compute_environment:</span> <span class="hljs-string">LOCAL_MACHINE</span>
<span class="hljs-attr">deepspeed_config:</span>
<span class="hljs-attr">gradient_accumulation_steps:</span> <span class="hljs-number">4</span>
<span class="hljs-attr">offload_optimizer_device:</span> <span class="hljs-string">cpu</span>
<span class="hljs-attr">offload_param_device:</span> <span class="hljs-string">cpu</span>
<span class="hljs-attr">zero3_init_flag:</span> <span class="hljs-literal">false</span>
<span class="hljs-attr">zero_stage:</span> <span class="hljs-number">2</span>
<span class="hljs-attr">distributed_type:</span> <span class="hljs-string">DEEPSPEED</span><!-- HTML_TAG_END --></pre></div>
<p>&lt;Tip&gt;</p>
<p>See <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed" rel="nofollow">documentation</a> for more DeepSpeed configuration options.</p>
<p>&lt;Tip&gt;</p>
<p>Changing the default Adam optimizer to DeepSpeed’s Adam
<code>deepspeed.ops.adam.DeepSpeedCPUAdam</code> gives a substantial speedup but
it requires a CUDA toolchain with the same version as PyTorch. 8-bit optimizer
does not seem to be compatible with DeepSpeed at the moment.</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;path to save model&quot;</span>
accelerate launch train_controlnet.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--dataset_name=fusing/fill50k \
--resolution=512 \
--validation_image <span class="hljs-string">&quot;./conditioning_image_1.png&quot;</span> <span class="hljs-string">&quot;./conditioning_image_2.png&quot;</span> \
--validation_prompt <span class="hljs-string">&quot;red circle with blue background&quot;</span> <span class="hljs-string">&quot;cyan circle with brown floral background&quot;</span> \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--gradient_checkpointing \
--enable_xformers_memory_efficient_attention \
--set_grads_to_none \
--mixed_precision fp16 \
--push_to_hub<!-- HTML_TAG_END --></pre></div>
<h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Inference
</span></h2>
<p>The trained model can be run with the <a href="/docs/diffusers/v0.19.2/en/api/pipelines/controlnet#diffusers.StableDiffusionControlNetPipeline">StableDiffusionControlNetPipeline</a>.
Set <code>base_model_path</code> and <code>controlnet_path</code> to the values <code>--pretrained_model_name_or_path</code> and
<code>--output_dir</code> were respectively set to in the training script.</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
<span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image
<span class="hljs-keyword">import</span> torch
base_model_path = <span class="hljs-string">&quot;path to model&quot;</span>
controlnet_path = <span class="hljs-string">&quot;path to controlnet&quot;</span>
controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
base_model_path, controlnet=controlnet, torch_dtype=torch.float16
)
<span class="hljs-comment"># speed up diffusion process with faster scheduler and memory optimization</span>
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
<span class="hljs-comment"># remove following line if xformers is not installed</span>
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()
control_image = load_image(<span class="hljs-string">&quot;./conditioning_image_1.png&quot;</span>)
prompt = <span class="hljs-string">&quot;pale golden rod circle with old lace background&quot;</span>
<span class="hljs-comment"># generate image</span>
generator = torch.manual_seed(<span class="hljs-number">0</span>)
image = pipe(prompt, num_inference_steps=<span class="hljs-number">20</span>, generator=generator, image=control_image).images[<span class="hljs-number">0</span>]
image.save(<span class="hljs-string">&quot;./output.png&quot;</span>)<!-- HTML_TAG_END --></pre></div>
<script type="module" data-hydrate="1fli3h7">
import { start } from "/docs/diffusers/v0.19.2/en/_app/start-hf-doc-builder.js";
start({
target: document.querySelector('[data-hydrate="1fli3h7"]').parentNode,
paths: {"base":"/docs/diffusers/v0.19.2/en","assets":"/docs/diffusers/v0.19.2/en"},
session: {},
route: false,
spa: false,
trailing_slash: "never",
hydrate: {
status: 200,
error: null,
nodes: [
import("/docs/diffusers/v0.19.2/en/_app/pages/__layout.svelte-hf-doc-builder.js"),
import("/docs/diffusers/v0.19.2/en/_app/pages/training/controlnet.mdx-hf-doc-builder.js")
],
params: {}
}
});
</script>

Xet Storage Details

Size:
53.2 kB
·
Xet hash:
47f80034fa6577966d54ce76f7a5610e35cfabc1a0757d645dbbb98a865a1b36

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.