Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"ControlNet","local":"controlnet","sections":[{"title":"Installing the dependencies","local":"installing-the-dependencies","sections":[],"depth":2},{"title":"Circle filling dataset","local":"circle-filling-dataset","sections":[],"depth":2},{"title":"Training","local":"training","sections":[],"depth":2},{"title":"Training with multiple GPUs","local":"training-with-multiple-gpus","sections":[],"depth":2},{"title":"Example results","local":"example-results","sections":[{"title":"After 300 steps with batch size 8","local":"after-300-steps-with-batch-size-8","sections":[],"depth":4},{"title":"After 6000 steps with batch size 8:","local":"after-6000-steps-with-batch-size-8","sections":[],"depth":4}],"depth":2},{"title":"Training on a 16 GB GPU","local":"training-on-a-16-gb-gpu","sections":[],"depth":2},{"title":"Training on a 12 GB GPU","local":"training-on-a-12-gb-gpu","sections":[],"depth":2},{"title":"Training on an 8 GB GPU","local":"training-on-an-8-gb-gpu","sections":[],"depth":2},{"title":"Inference","local":"inference","sections":[],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/v0.22.2/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/scheduler.182ea377.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/singletons.60172a60.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.1f6d62f6.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/paths.49cddc6d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.abf12888.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/0.efabe74f.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/134.0e084f0d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Tip.230e2334.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/CodeBlock.57fe6e13.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Heading.16916d63.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"ControlNet","local":"controlnet","sections":[{"title":"Installing the dependencies","local":"installing-the-dependencies","sections":[],"depth":2},{"title":"Circle filling dataset","local":"circle-filling-dataset","sections":[],"depth":2},{"title":"Training","local":"training","sections":[],"depth":2},{"title":"Training with multiple GPUs","local":"training-with-multiple-gpus","sections":[],"depth":2},{"title":"Example results","local":"example-results","sections":[{"title":"After 300 steps with batch size 8","local":"after-300-steps-with-batch-size-8","sections":[],"depth":4},{"title":"After 6000 steps with batch size 8:","local":"after-6000-steps-with-batch-size-8","sections":[],"depth":4}],"depth":2},{"title":"Training on a 16 GB GPU","local":"training-on-a-16-gb-gpu","sections":[],"depth":2},{"title":"Training on a 12 GB GPU","local":"training-on-a-12-gb-gpu","sections":[],"depth":2},{"title":"Training on an 8 GB GPU","local":"training-on-an-8-gb-gpu","sections":[],"depth":2},{"title":"Inference","local":"inference","sections":[],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="controlnet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#controlnet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ControlNet</span></h1> <p data-svelte-h="svelte-vqe38f"><a href="https://arxiv.org/abs/2302.05543" rel="nofollow">Adding Conditional Control to Text-to-Image Diffusion Models</a> (ControlNet) by Lvmin Zhang and Maneesh Agrawala.</p> <p data-svelte-h="svelte-h8xjkl">This example is based on the <a href="https://github.com/lllyasviel/ControlNet/blob/main/docs/train.md" rel="nofollow">training example in the original ControlNet repository</a>. It trains a ControlNet to fill circles using a <a href="https://huggingface.co/datasets/fusing/fill50k" rel="nofollow">small synthetic dataset</a>.</p> <h2 class="relative group"><a id="installing-the-dependencies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installing-the-dependencies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Installing the dependencies</span></h2> <p data-svelte-h="svelte-13r6xyj">Before running the scripts, make sure to install the library’s training dependencies.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-i634tn">To successfully run the latest versions of the example scripts, we highly recommend <strong>installing from source</strong> and keeping the installation up to date. We update the example scripts frequently and install example-specific requirements.</p></div> <p data-svelte-h="svelte-avpba0">To do this, execute the following steps in a new virtual environment:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->git <span class="hljs-built_in">clone</span> https://github.com/huggingface/diffusers | |
| <span class="hljs-built_in">cd</span> diffusers | |
| pip install -e .<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-uwesou">Then navigate into the <a href="https://github.com/huggingface/diffusers/tree/main/examples/controlnet" rel="nofollow">example folder</a></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">cd</span> examples/controlnet<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-17vzo3f">Now run:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -r requirements.txt<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1apictl">And initialize an <a href="https://github.com/huggingface/accelerate/" rel="nofollow">🤗Accelerate</a> environment with:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ow8iv6">Or for a default 🤗Accelerate configuration without answering questions about your environment:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config default<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1asek1h">Or if your environment doesn’t support an interactive shell like a notebook:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate.utils <span class="hljs-keyword">import</span> write_basic_config | |
| write_basic_config()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="circle-filling-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#circle-filling-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Circle filling dataset</span></h2> <p data-svelte-h="svelte-tk7tt3">The original dataset is hosted in the ControlNet <a href="https://huggingface.co/lllyasviel/ControlNet/blob/main/training/fill50k.zip" rel="nofollow">repo</a>, but we re-uploaded it <a href="https://huggingface.co/datasets/fusing/fill50k" rel="nofollow">here</a> to be compatible with 🤗 Datasets so that it can handle the data loading within the training script.</p> <p data-svelte-h="svelte-itxq4h">Our training examples use <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>runwayml/stable-diffusion-v1-5</code></a> because that is what the original set of ControlNet models was trained on. However, ControlNet can be trained to augment any compatible Stable Diffusion model (such as <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4" rel="nofollow"><code>CompVis/stable-diffusion-v1-4</code></a>) or <a href="https://huggingface.co/stabilityai/stable-diffusion-2-1" rel="nofollow"><code>stabilityai/stable-diffusion-2-1</code></a>.</p> <p data-svelte-h="svelte-19a7h78">To use your own dataset, take a look at the <a href="create_dataset">Create a dataset for training</a> guide.</p> <h2 class="relative group"><a id="training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training</span></h2> <p data-svelte-h="svelte-hvzp9c">Download the following images to condition our training with:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png | |
| wget https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sj32mn">Specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <a href="https://huggingface.co/docs/diffusers/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained.pretrained_model_name_or_path" rel="nofollow"><code>pretrained_model_name_or_path</code></a> argument.</p> <p data-svelte-h="svelte-l3tgq2">The training script creates and saves a <code>diffusion_pytorch_model.bin</code> file in your repository.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path to save model"</span> | |
| accelerate launch train_controlnet.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --dataset_name=fusing/fill50k \ | |
| --resolution=512 \ | |
| --learning_rate=1e-5 \ | |
| --validation_image <span class="hljs-string">"./conditioning_image_1.png"</span> <span class="hljs-string">"./conditioning_image_2.png"</span> \ | |
| --validation_prompt <span class="hljs-string">"red circle with blue background"</span> <span class="hljs-string">"cyan circle with brown floral background"</span> \ | |
| --train_batch_size=4 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-j8njrp">This default configuration requires ~38GB VRAM.</p> <p data-svelte-h="svelte-3pyfnn">By default, the training script logs outputs to tensorboard. Pass <code>--report_to wandb</code> to use Weights & | |
| Biases.</p> <p data-svelte-h="svelte-1uie82a">Gradient accumulation with a smaller batch size can be used to reduce training requirements to ~20 GB VRAM.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path to save model"</span> | |
| accelerate launch train_controlnet.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --dataset_name=fusing/fill50k \ | |
| --resolution=512 \ | |
| --learning_rate=1e-5 \ | |
| --validation_image <span class="hljs-string">"./conditioning_image_1.png"</span> <span class="hljs-string">"./conditioning_image_2.png"</span> \ | |
| --validation_prompt <span class="hljs-string">"red circle with blue background"</span> <span class="hljs-string">"cyan circle with brown floral background"</span> \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=4 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="training-with-multiple-gpus" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-with-multiple-gpus"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training with multiple GPUs</span></h2> <p data-svelte-h="svelte-1tkye3l"><code>accelerate</code> allows for seamless multi-GPU training. Follow the instructions <a href="https://huggingface.co/docs/accelerate/basic_tutorials/launch" rel="nofollow">here</a> | |
| for running distributed training with <code>accelerate</code>. Here is an example command:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path to save model"</span> | |
| accelerate launch --mixed_precision=<span class="hljs-string">"fp16"</span> --multi_gpu train_controlnet.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --dataset_name=fusing/fill50k \ | |
| --resolution=512 \ | |
| --learning_rate=1e-5 \ | |
| --validation_image <span class="hljs-string">"./conditioning_image_1.png"</span> <span class="hljs-string">"./conditioning_image_2.png"</span> \ | |
| --validation_prompt <span class="hljs-string">"red circle with blue background"</span> <span class="hljs-string">"cyan circle with brown floral background"</span> \ | |
| --train_batch_size=4 \ | |
| --mixed_precision=<span class="hljs-string">"fp16"</span> \ | |
| --tracker_project_name=<span class="hljs-string">"controlnet-demo"</span> \ | |
| --report_to=wandb \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="example-results" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-results"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example results</span></h2> <h4 class="relative group"><a id="after-300-steps-with-batch-size-8" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#after-300-steps-with-batch-size-8"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>After 300 steps with batch size 8</span></h4> <table data-svelte-h="svelte-626s6m"><thead><tr><th></th> <th align="center"></th></tr></thead> <tbody><tr><td></td> <td align="center">red circle with blue background</td></tr> <tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png" alt="conditioning image"></td> <td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/red_circle_with_blue_background_300_steps.png" alt="red circle with blue background"></td></tr> <tr><td></td> <td align="center">cyan circle with brown floral background</td></tr> <tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png" alt="conditioning image"></td> <td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/cyan_circle_with_brown_floral_background_300_steps.png" alt="cyan circle with brown floral background"></td></tr></tbody></table> <h4 class="relative group"><a id="after-6000-steps-with-batch-size-8" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#after-6000-steps-with-batch-size-8"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>After 6000 steps with batch size 8:</span></h4> <table data-svelte-h="svelte-15uqe2y"><thead><tr><th></th> <th align="center"></th></tr></thead> <tbody><tr><td></td> <td align="center">red circle with blue background</td></tr> <tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_1.png" alt="conditioning image"></td> <td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/red_circle_with_blue_background_6000_steps.png" alt="red circle with blue background"></td></tr> <tr><td></td> <td align="center">cyan circle with brown floral background</td></tr> <tr><td><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/conditioning_image_2.png" alt="conditioning image"></td> <td align="center"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/controlnet_training/cyan_circle_with_brown_floral_background_6000_steps.png" alt="cyan circle with brown floral background"></td></tr></tbody></table> <h2 class="relative group"><a id="training-on-a-16-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-a-16-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training on a 16 GB GPU</span></h2> <p data-svelte-h="svelte-srwe1s">Enable the following optimizations to train on a 16GB GPU:</p> <ul data-svelte-h="svelte-1ud41h2"><li>Gradient checkpointing</li> <li>bitsandbyte’s 8-bit optimizer (take a look at the [installation]((<a href="https://github.com/TimDettmers/bitsandbytes#requirements--installation" rel="nofollow">https://github.com/TimDettmers/bitsandbytes#requirements—installation</a>) instructions if you don’t already have it installed)</li></ul> <p data-svelte-h="svelte-xz1hns">Now you can launch the training script:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path to save model"</span> | |
| accelerate launch train_controlnet.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --dataset_name=fusing/fill50k \ | |
| --resolution=512 \ | |
| --learning_rate=1e-5 \ | |
| --validation_image <span class="hljs-string">"./conditioning_image_1.png"</span> <span class="hljs-string">"./conditioning_image_2.png"</span> \ | |
| --validation_prompt <span class="hljs-string">"red circle with blue background"</span> <span class="hljs-string">"cyan circle with brown floral background"</span> \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=4 \ | |
| --gradient_checkpointing \ | |
| --use_8bit_adam \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="training-on-a-12-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-a-12-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training on a 12 GB GPU</span></h2> <p data-svelte-h="svelte-gjmxa4">Enable the following optimizations to train on a 12GB GPU:</p> <ul data-svelte-h="svelte-fhxfez"><li>Gradient checkpointing</li> <li>bitsandbyte’s 8-bit optimizer (take a look at the [installation]((<a href="https://github.com/TimDettmers/bitsandbytes#requirements--installation" rel="nofollow">https://github.com/TimDettmers/bitsandbytes#requirements—installation</a>) instructions if you don’t already have it installed)</li> <li>xFormers (take a look at the <a href="https://huggingface.co/docs/diffusers/training/optimization/xformers" rel="nofollow">installation</a> instructions if you don’t already have it installed)</li> <li>set gradients to <code>None</code></li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path to save model"</span> | |
| accelerate launch train_controlnet.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --dataset_name=fusing/fill50k \ | |
| --resolution=512 \ | |
| --learning_rate=1e-5 \ | |
| --validation_image <span class="hljs-string">"./conditioning_image_1.png"</span> <span class="hljs-string">"./conditioning_image_2.png"</span> \ | |
| --validation_prompt <span class="hljs-string">"red circle with blue background"</span> <span class="hljs-string">"cyan circle with brown floral background"</span> \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=4 \ | |
| --gradient_checkpointing \ | |
| --use_8bit_adam \ | |
| --enable_xformers_memory_efficient_attention \ | |
| --set_grads_to_none \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-8c8k6u">When using <code>enable_xformers_memory_efficient_attention</code>, please make sure to install <code>xformers</code> by <code>pip install xformers</code>.</p> <h2 class="relative group"><a id="training-on-an-8-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-an-8-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training on an 8 GB GPU</span></h2> <p data-svelte-h="svelte-178qs1f">We have not exhaustively tested DeepSpeed support for ControlNet. While the configuration does | |
| save memory, we have not confirmed whether the configuration trains successfully. You will very likely | |
| have to make changes to the config to have a successful training run.</p> <p data-svelte-h="svelte-mrrfah">Enable the following optimizations to train on a 8GB GPU:</p> <ul data-svelte-h="svelte-1u1ac16"><li>Gradient checkpointing</li> <li>bitsandbyte’s 8-bit optimizer (take a look at the [installation]((<a href="https://github.com/TimDettmers/bitsandbytes#requirements--installation" rel="nofollow">https://github.com/TimDettmers/bitsandbytes#requirements—installation</a>) instructions if you don’t already have it installed)</li> <li>xFormers (take a look at the <a href="https://huggingface.co/docs/diffusers/training/optimization/xformers" rel="nofollow">installation</a> instructions if you don’t already have it installed)</li> <li>set gradients to <code>None</code></li> <li>DeepSpeed stage 2 with parameter and optimizer offloading</li> <li>fp16 mixed precision</li></ul> <p data-svelte-h="svelte-17z57jf"><a href="https://www.deepspeed.ai/" rel="nofollow">DeepSpeed</a> can offload tensors from VRAM to either | |
| CPU or NVME. This requires significantly more RAM (about 25 GB).</p> <p data-svelte-h="svelte-nyyim6">You’ll have to configure your environment with <code>accelerate config</code> to enable DeepSpeed stage 2.</p> <p data-svelte-h="svelte-120hras">The configuration file should look like this:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">compute_environment:</span> <span class="hljs-string">LOCAL_MACHINE</span> | |
| <span class="hljs-attr">deepspeed_config:</span> | |
| <span class="hljs-attr">gradient_accumulation_steps:</span> <span class="hljs-number">4</span> | |
| <span class="hljs-attr">offload_optimizer_device:</span> <span class="hljs-string">cpu</span> | |
| <span class="hljs-attr">offload_param_device:</span> <span class="hljs-string">cpu</span> | |
| <span class="hljs-attr">zero3_init_flag:</span> <span class="hljs-literal">false</span> | |
| <span class="hljs-attr">zero_stage:</span> <span class="hljs-number">2</span> | |
| <span class="hljs-attr">distributed_type:</span> <span class="hljs-string">DEEPSPEED</span><!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1i2ijui">See <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed" rel="nofollow">documentation</a> for more DeepSpeed configuration options.</p></div> <p data-svelte-h="svelte-1x09dzx">Changing the default Adam optimizer to DeepSpeed’s Adam | |
| <code>deepspeed.ops.adam.DeepSpeedCPUAdam</code> gives a substantial speedup but | |
| it requires a CUDA toolchain with the same version as PyTorch. 8-bit optimizer | |
| does not seem to be compatible with DeepSpeed at the moment.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_DIR=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path to save model"</span> | |
| accelerate launch train_controlnet.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --dataset_name=fusing/fill50k \ | |
| --resolution=512 \ | |
| --validation_image <span class="hljs-string">"./conditioning_image_1.png"</span> <span class="hljs-string">"./conditioning_image_2.png"</span> \ | |
| --validation_prompt <span class="hljs-string">"red circle with blue background"</span> <span class="hljs-string">"cyan circle with brown floral background"</span> \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=4 \ | |
| --gradient_checkpointing \ | |
| --enable_xformers_memory_efficient_attention \ | |
| --set_grads_to_none \ | |
| --mixed_precision fp16 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference</span></h2> <p data-svelte-h="svelte-21zqpa">The trained model can be run with the <a href="/docs/diffusers/v0.22.2/en/api/pipelines/controlnet#diffusers.StableDiffusionControlNetPipeline">StableDiffusionControlNetPipeline</a>. | |
| Set <code>base_model_path</code> and <code>controlnet_path</code> to the values <code>--pretrained_model_name_or_path</code> and | |
| <code>--output_dir</code> were respectively set to in the training script.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> load_image | |
| <span class="hljs-keyword">import</span> torch | |
| base_model_path = <span class="hljs-string">"path to model"</span> | |
| controlnet_path = <span class="hljs-string">"path to controlnet"</span> | |
| controlnet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>) | |
| pipe = StableDiffusionControlNetPipeline.from_pretrained( | |
| base_model_path, controlnet=controlnet, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ) | |
| <span class="hljs-comment"># speed up diffusion process with faster scheduler and memory optimization</span> | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
| <span class="hljs-comment"># remove following line if xformers is not installed</span> | |
| pipe.enable_xformers_memory_efficient_attention() | |
| pipe.enable_model_cpu_offload() | |
| control_image = load_image(<span class="hljs-string">"./conditioning_image_1.png"</span>) | |
| prompt = <span class="hljs-string">"pale golden rod circle with old lace background"</span> | |
| <span class="hljs-comment"># generate image</span> | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| image = pipe(prompt, num_inference_steps=<span class="hljs-number">20</span>, generator=generator, image=control_image).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"./output.png"</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="stable-diffusion-xl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-diffusion-xl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stable Diffusion XL</span></h2> <p data-svelte-h="svelte-1jon7xv">Training with <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">Stable Diffusion XL</a> is also supported via the <code>train_controlnet_sdxl.py</code> script. Please refer to the docs <a href="https://github.com/huggingface/diffusers/blob/main/examples/controlnet/README_sdxl.md" rel="nofollow">here</a>.</p> <p></p> | |
| <script> | |
| { | |
| __sveltekit_ynvcvq = { | |
| assets: "/docs/diffusers/v0.22.2/en", | |
| base: "/docs/diffusers/v0.22.2/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js"), | |
| import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 134], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 59.2 kB
- Xet hash:
- 054b5245a210153deaa41b067ccdfbfe5535f8c66129bb82a050b7193dac1758
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.