Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"DreamBooth","local":"dreambooth","sections":[{"title":"Finetuning","local":"finetuning","sections":[],"depth":2},{"title":"Finetuning with prior-preserving loss","local":"finetuning-with-prior-preserving-loss","sections":[],"depth":2},{"title":"Finetuning the text encoder and UNet","local":"finetuning-the-text-encoder-and-unet","sections":[],"depth":2},{"title":"Finetuning with LoRA","local":"finetuning-with-lora","sections":[],"depth":2},{"title":"Saving checkpoints while training","local":"saving-checkpoints-while-training","sections":[{"title":"Resume training from a saved checkpoint","local":"resume-training-from-a-saved-checkpoint","sections":[],"depth":3},{"title":"Inference from a saved checkpoint","local":"inference-from-a-saved-checkpoint","sections":[],"depth":3}],"depth":2},{"title":"Optimizations for different GPU sizes","local":"optimizations-for-different-gpu-sizes","sections":[{"title":"xFormers","local":"xformers","sections":[],"depth":3},{"title":"Set gradients to none","local":"set-gradients-to-none","sections":[],"depth":3},{"title":"16GB GPU","local":"16gb-gpu","sections":[],"depth":3},{"title":"12GB GPU","local":"12gb-gpu","sections":[],"depth":3},{"title":"8 GB GPU","local":"8-gb-gpu","sections":[],"depth":3}],"depth":2},{"title":"Inference","local":"inference","sections":[],"depth":2},{"title":"IF","local":"if","sections":[{"title":"Tips and Tricks","local":"tips-and-tricks","sections":[],"depth":3},{"title":"Stage II additional validation images","local":"stage-ii-additional-validation-images","sections":[],"depth":3},{"title":"IF stage I LoRA Dreambooth","local":"if-stage-i-lora-dreambooth","sections":[],"depth":3},{"title":"IF stage II LoRA Dreambooth","local":"if-stage-ii-lora-dreambooth","sections":[],"depth":3},{"title":"IF Stage I Full Dreambooth","local":"if-stage-i-full-dreambooth","sections":[],"depth":3},{"title":"IF Stage II Full Dreambooth","local":"if-stage-ii-full-dreambooth","sections":[],"depth":3}],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/v0.22.2/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/scheduler.182ea377.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/singletons.60172a60.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.1f6d62f6.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/paths.49cddc6d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.abf12888.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/0.efabe74f.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/139.57db7bcf.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Tip.230e2334.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/CodeBlock.57fe6e13.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Markdown.7a59e0fd.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/globals.7f7f1b26.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/stores.aa51e67d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Heading.16916d63.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"DreamBooth","local":"dreambooth","sections":[{"title":"Finetuning","local":"finetuning","sections":[],"depth":2},{"title":"Finetuning with prior-preserving loss","local":"finetuning-with-prior-preserving-loss","sections":[],"depth":2},{"title":"Finetuning the text encoder and UNet","local":"finetuning-the-text-encoder-and-unet","sections":[],"depth":2},{"title":"Finetuning with LoRA","local":"finetuning-with-lora","sections":[],"depth":2},{"title":"Saving checkpoints while training","local":"saving-checkpoints-while-training","sections":[{"title":"Resume training from a saved checkpoint","local":"resume-training-from-a-saved-checkpoint","sections":[],"depth":3},{"title":"Inference from a saved checkpoint","local":"inference-from-a-saved-checkpoint","sections":[],"depth":3}],"depth":2},{"title":"Optimizations for different GPU sizes","local":"optimizations-for-different-gpu-sizes","sections":[{"title":"xFormers","local":"xformers","sections":[],"depth":3},{"title":"Set gradients to none","local":"set-gradients-to-none","sections":[],"depth":3},{"title":"16GB GPU","local":"16gb-gpu","sections":[],"depth":3},{"title":"12GB GPU","local":"12gb-gpu","sections":[],"depth":3},{"title":"8 GB GPU","local":"8-gb-gpu","sections":[],"depth":3}],"depth":2},{"title":"Inference","local":"inference","sections":[],"depth":2},{"title":"IF","local":"if","sections":[{"title":"Tips and Tricks","local":"tips-and-tricks","sections":[],"depth":3},{"title":"Stage II additional validation images","local":"stage-ii-additional-validation-images","sections":[],"depth":3},{"title":"IF stage I LoRA Dreambooth","local":"if-stage-i-lora-dreambooth","sections":[],"depth":3},{"title":"IF stage II LoRA Dreambooth","local":"if-stage-ii-lora-dreambooth","sections":[],"depth":3},{"title":"IF Stage I Full Dreambooth","local":"if-stage-i-full-dreambooth","sections":[],"depth":3},{"title":"IF Stage II Full Dreambooth","local":"if-stage-ii-full-dreambooth","sections":[],"depth":3}],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DreamBooth</span></h1> <p data-svelte-h="svelte-1e4e3cx"><a href="https://arxiv.org/abs/2208.12242" rel="nofollow">DreamBooth</a> is a method to personalize text-to-image models like Stable Diffusion given just a few (3-5) images of a subject. It allows the model to generate contextualized images of the subject in different scenes, poses, and views.</p> <p data-svelte-h="svelte-ytrbfy"><img src="https://dreambooth.github.io/DreamBooth_files/teaser_static.jpg" alt="Dreambooth examples from the project's blog"></p> <small data-svelte-h="svelte-wmsu58">Dreambooth examples from the <a href="https://dreambooth.github.io">project's blog.</a></small> <p data-svelte-h="svelte-1766onr">This guide will show you how to finetune DreamBooth with the <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4" rel="nofollow"><code>CompVis/stable-diffusion-v1-4</code></a> model for various GPU sizes, and with Flax. All the training scripts for DreamBooth used in this guide can be found <a href="https://github.com/huggingface/diffusers/tree/main/examples/dreambooth" rel="nofollow">here</a> if you’re interested in digging deeper and seeing how things work.</p> <p data-svelte-h="svelte-1hqez46">Before running the scripts, make sure you install the library’s training dependencies. We also recommend installing 🧨 Diffusers from the <code>main</code> GitHub branch:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install git+https://github.com/huggingface/diffusers | |
| pip install -U -r diffusers/examples/dreambooth/requirements.txt<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1udyijm">xFormers is not part of the training requirements, but we recommend you <a href="../optimization/xformers">install</a> it if you can because it could make your training faster and less memory intensive.</p> <p data-svelte-h="svelte-ygb1fc">After all the dependencies have been set up, initialize a <a href="https://github.com/huggingface/accelerate/" rel="nofollow">🤗 Accelerate</a> environment with:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-di6juu">To setup a default 🤗 Accelerate environment without choosing any configurations:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config default<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-130n95n">Or if your environment doesn’t support an interactive shell like a notebook, you can use:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate.utils <span class="hljs-keyword">import</span> write_basic_config | |
| write_basic_config()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1kk0dkm">Finally, download a <a href="https://huggingface.co/datasets/diffusers/dog-example" rel="nofollow">few images of a dog</a> to DreamBooth with:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> snapshot_download | |
| local_dir = <span class="hljs-string">"./dog"</span> | |
| snapshot_download( | |
| <span class="hljs-string">"diffusers/dog-example"</span>, | |
| local_dir=local_dir, | |
| repo_type=<span class="hljs-string">"dataset"</span>, | |
| ignore_patterns=<span class="hljs-string">".gitattributes"</span>, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19a7h78">To use your own dataset, take a look at the <a href="create_dataset">Create a dataset for training</a> guide.</p> <h2 class="relative group"><a id="finetuning" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#finetuning"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Finetuning</span></h2> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-1ije7yy">DreamBooth finetuning is very sensitive to hyperparameters and easy to overfit. We recommend you take a look at our <a href="https://huggingface.co/blog/dreambooth" rel="nofollow">in-depth analysis</a> with recommended settings for different subjects to help you choose the appropriate hyperparameters.</p></div> <div class="space-y-10 py-6 2xl:py-8 2xl:-mx-4"> <div class="border border-gray-200 rounded-xl px-4 relative"><div class="flex h-[22px] mt-[-12.5px] justify-between leading-none"><div class="flex px-1 items-center space-x-1 bg-white dark:bg-gray-950"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> <span>Pytorch</span></div> <div class="cursor-pointer flex items-center justify-center space-x-1 text-sm px-2 bg-white dark:bg-gray-950 hover:underline leading-none"><svg class="" width="0.9em" height="0.9em" viewBox="0 0 10 9" fill="currentColor" xmlns="http://www.w3.org/2000/svg"><path d="M1.39125 1.9725L0.0883333 0.669997L0.677917 0.0804138L8.9275 8.33041L8.33792 8.91958L6.95875 7.54041C6.22592 8.00523 5.37572 8.25138 4.50792 8.25C2.26125 8.25 0.392083 6.63333 0 4.5C0.179179 3.52946 0.667345 2.64287 1.39167 1.9725H1.39125ZM5.65667 6.23833L5.04667 5.62833C4.81335 5.73996 4.55116 5.77647 4.29622 5.73282C4.04129 5.68918 3.80617 5.56752 3.62328 5.38463C3.44039 5.20175 3.31874 4.96663 3.27509 4.71169C3.23144 4.45676 3.26795 4.19456 3.37958 3.96125L2.76958 3.35125C2.50447 3.75187 2.38595 4.2318 2.4341 4.70978C2.48225 5.18777 2.6941 5.63442 3.0338 5.97411C3.37349 6.31381 3.82015 6.52567 4.29813 6.57382C4.77611 6.62197 5.25605 6.50345 5.65667 6.23833ZM2.83042 1.06666C3.35 0.862497 3.91625 0.749997 4.50792 0.749997C6.75458 0.749997 8.62375 2.36666 9.01583 4.5C8.88816 5.19404 8.60119 5.84899 8.1775 6.41333L6.56917 4.805C6.61694 4.48317 6.58868 4.15463 6.48664 3.84569C6.3846 3.53675 6.21162 3.256 5.98156 3.02594C5.7515 2.79588 5.47075 2.6229 5.16181 2.52086C4.85287 2.41882 4.52433 2.39056 4.2025 2.43833L2.83042 1.06708V1.06666Z" fill="currentColor"></path></svg> <span>Hide Pytorch content</span></div></div> <div class="framework-content"> <p data-svelte-h="svelte-87vqu3">Set the <code>INSTANCE_DIR</code> environment variable to the path of the directory containing the dog images.</p> <p data-svelte-h="svelte-3hq69k">Specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <code>pretrained_model_name_or_path</code> argument. The <code>instance_prompt</code> argument is a text prompt that contains a unique identifier, such as <code>sks</code>, and the class the image belongs to, which in this example is <code>a photo of a sks dog</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gl6rb5">Then you can launch the training script (you can find the full training script <a href="https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth.py" rel="nofollow">here</a>) with the following command:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch train_dreambooth.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=1 \ | |
| --learning_rate=5e-6 \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --max_train_steps=400 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div></div></div> <div class="border border-gray-200 rounded-xl px-4 relative"><div class="flex h-[22px] mt-[-12.5px] justify-between leading-none"><div class="flex px-1 items-center space-x-1 bg-white dark:bg-gray-950"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1.73em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 451 260.81"><style>.J { | |
| stroke: #dce0df; | |
| } | |
| .K { | |
| stroke-linejoin: round; | |
| } | |
| </style><g fill="#5e97f6" class="J K"><path d="M50.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M.5 217.01l25-43.3h50l-25 43.3H.5z"></path><path d="M125.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M175.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M150.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M175.5 87.1l-25 43.3h50l25-43.3h-50z"></path><path d="M200.5 43.8l-25 43.3h50l25-43.3h-50z"></path><path d="M225.5.5l-25 43.3h50l25-43.3h-50z"></path></g><g fill="#2a56c6" class="J K"><path d="M.5 217.01l25 43.3h50l-25-43.3H.5z"></path><path d="M125.5 260.31h-50l-25-43.3h50l25 43.3z"></path><path d="M175.5 260.31h-50l-25-43.3h50l25 43.3z"></path></g><g fill="#00796b" class="J K"><path d="M200.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zm50-86.61l-25-43.3-25 43.3h50z"></path><path d="M250.5 43.8l-25 43.3 25 43.3 25-43.3-25-43.3z"></path></g><path d="M125.5 173.71l-25-43.31-25 43.31h50z" fill="#3367d6" class="J K"></path><g fill="#26a69a" class="J K"><path d="M250.5 130.4h-50l-25 43.31h50l25-43.31z"></path><path d="M300.5 130.4h-50l-25 43.31h50l25-43.31z"></path></g><g fill="#9c27b0" class="J K"><path d="M350.5 43.8L325.5.5l-25 43.3 25 43.3 25-43.3z"></path><path d="M375.5 87.1l-25-43.3-25 43.3 25 43.3 25-43.3z"></path><path d="M400.5 130.4l-25-43.3-25 43.3 25 43.31 25-43.31z"></path><path d="M425.5 173.71l-25-43.31-25 43.31 25 43.3 25-43.3z"></path><path d="M450.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zM425.5.5l-25 43.3 25 43.3 25-43.3-25-43.3z"></path><path d="M375.5 87.1l25-43.3 25 43.3-25 43.3-25-43.3zm-25 43.3l-25 43.31 25 43.3 25-43.3-25-43.31z"></path><path d="M325.5 260.31l-25-43.3 25-43.3 25 43.3-25 43.3z"></path></g><path d="M275.5 260.31l-25-43.3h50l25 43.3h-50z" fill="#6a1b9a" class="J K"></path><g fill="#00695c" class="J K"><path d="M225.5 173.71h-50l25 43.3h50l-25-43.3z"></path><path d="M275.5 173.71h-50l25 43.3 25-43.3zm0-86.61l25 43.3h50l-25-43.3h-50z"></path><path d="M300.5 43.8h-50l25 43.3h50l-25-43.3zm125 216.51l-25-43.3h-50l25 43.3h50z"></path><path d="M375.5 173.71l-25 43.3h50l-25-43.3z"></path></g><g fill="#ea80fc" class="J K"><path d="M325.5.5h-50l-25 43.3h50l25-43.3zm0 173.21h-50l-25 43.3h50l25-43.3z"></path><path d="M350.5 130.4h-50l-25 43.31h50l25-43.31zM425.5.5h-50l-25 43.3h50l25-43.3z"></path><path d="M375.5 87.1l-25-43.3h50l-25 43.3z"></path></g></svg> <span>JAX</span></div> <div class="cursor-pointer flex items-center justify-center space-x-1 text-sm px-2 bg-white dark:bg-gray-950 hover:underline leading-none"><svg class="" width="0.9em" height="0.9em" viewBox="0 0 10 9" fill="currentColor" xmlns="http://www.w3.org/2000/svg"><path d="M1.39125 1.9725L0.0883333 0.669997L0.677917 0.0804138L8.9275 8.33041L8.33792 8.91958L6.95875 7.54041C6.22592 8.00523 5.37572 8.25138 4.50792 8.25C2.26125 8.25 0.392083 6.63333 0 4.5C0.179179 3.52946 0.667345 2.64287 1.39167 1.9725H1.39125ZM5.65667 6.23833L5.04667 5.62833C4.81335 5.73996 4.55116 5.77647 4.29622 5.73282C4.04129 5.68918 3.80617 5.56752 3.62328 5.38463C3.44039 5.20175 3.31874 4.96663 3.27509 4.71169C3.23144 4.45676 3.26795 4.19456 3.37958 3.96125L2.76958 3.35125C2.50447 3.75187 2.38595 4.2318 2.4341 4.70978C2.48225 5.18777 2.6941 5.63442 3.0338 5.97411C3.37349 6.31381 3.82015 6.52567 4.29813 6.57382C4.77611 6.62197 5.25605 6.50345 5.65667 6.23833ZM2.83042 1.06666C3.35 0.862497 3.91625 0.749997 4.50792 0.749997C6.75458 0.749997 8.62375 2.36666 9.01583 4.5C8.88816 5.19404 8.60119 5.84899 8.1775 6.41333L6.56917 4.805C6.61694 4.48317 6.58868 4.15463 6.48664 3.84569C6.3846 3.53675 6.21162 3.256 5.98156 3.02594C5.7515 2.79588 5.47075 2.6229 5.16181 2.52086C4.85287 2.41882 4.52433 2.39056 4.2025 2.43833L2.83042 1.06708V1.06666Z" fill="currentColor"></path></svg> <span>Hide JAX content</span></div></div> <div class="framework-content"> <p data-svelte-h="svelte-5xo10d">If you have access to TPUs or want to train even faster, you can try out the <a href="https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_flax.py" rel="nofollow">Flax training script</a>. The Flax training script doesn’t support gradient checkpointing or gradient accumulation, so you’ll need a GPU with at least 30GB of memory.</p> <p data-svelte-h="svelte-4s2veb">Before running the script, make sure you have the requirements installed:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -U -r requirements.txt<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3hq69k">Specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <code>pretrained_model_name_or_path</code> argument. The <code>instance_prompt</code> argument is a text prompt that contains a unique identifier, such as <code>sks</code>, and the class the image belongs to, which in this example is <code>a photo of a sks dog</code>.</p> <p data-svelte-h="svelte-fw1ia1">Now you can launch the training script with the following command:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"duongna/stable-diffusion-v1-4-flax"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path-to-save-model"</span> | |
| python train_dreambooth_flax.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --learning_rate=5e-6 \ | |
| --max_train_steps=400 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> </div></div></div> <h2 class="relative group"><a id="finetuning-with-prior-preserving-loss" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#finetuning-with-prior-preserving-loss"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Finetuning with prior-preserving loss</span></h2> <p data-svelte-h="svelte-1vu0p8v">Prior preservation is used to avoid overfitting and language-drift (check out the <a href="https://arxiv.org/abs/2208.12242" rel="nofollow">paper</a> to learn more if you’re interested). For prior preservation, you use other images of the same class as part of the training process. The nice thing is that you can generate those images using the Stable Diffusion model itself! The training script will save the generated images to a local path you specify.</p> <p data-svelte-h="svelte-vlraqg">The authors recommend generating <code>num_epochs * num_samples</code> images for prior preservation. In most cases, 200-300 images work well.</p> <div class="space-y-10 py-6 2xl:py-8 2xl:-mx-4"> <div class="border border-gray-200 rounded-xl px-4 relative"><div class="flex h-[22px] mt-[-12.5px] justify-between leading-none"><div class="flex px-1 items-center space-x-1 bg-white dark:bg-gray-950"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> <span>Pytorch</span></div> <div class="cursor-pointer flex items-center justify-center space-x-1 text-sm px-2 bg-white dark:bg-gray-950 hover:underline leading-none"><svg class="" width="0.9em" height="0.9em" viewBox="0 0 10 9" fill="currentColor" xmlns="http://www.w3.org/2000/svg"><path d="M1.39125 1.9725L0.0883333 0.669997L0.677917 0.0804138L8.9275 8.33041L8.33792 8.91958L6.95875 7.54041C6.22592 8.00523 5.37572 8.25138 4.50792 8.25C2.26125 8.25 0.392083 6.63333 0 4.5C0.179179 3.52946 0.667345 2.64287 1.39167 1.9725H1.39125ZM5.65667 6.23833L5.04667 5.62833C4.81335 5.73996 4.55116 5.77647 4.29622 5.73282C4.04129 5.68918 3.80617 5.56752 3.62328 5.38463C3.44039 5.20175 3.31874 4.96663 3.27509 4.71169C3.23144 4.45676 3.26795 4.19456 3.37958 3.96125L2.76958 3.35125C2.50447 3.75187 2.38595 4.2318 2.4341 4.70978C2.48225 5.18777 2.6941 5.63442 3.0338 5.97411C3.37349 6.31381 3.82015 6.52567 4.29813 6.57382C4.77611 6.62197 5.25605 6.50345 5.65667 6.23833ZM2.83042 1.06666C3.35 0.862497 3.91625 0.749997 4.50792 0.749997C6.75458 0.749997 8.62375 2.36666 9.01583 4.5C8.88816 5.19404 8.60119 5.84899 8.1775 6.41333L6.56917 4.805C6.61694 4.48317 6.58868 4.15463 6.48664 3.84569C6.3846 3.53675 6.21162 3.256 5.98156 3.02594C5.7515 2.79588 5.47075 2.6229 5.16181 2.52086C4.85287 2.41882 4.52433 2.39056 4.2025 2.43833L2.83042 1.06708V1.06666Z" fill="currentColor"></path></svg> <span>Hide Pytorch content</span></div></div> <div class="framework-content"> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span> | |
| accelerate launch train_dreambooth.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --with_prior_preservation --prior_loss_weight=1.0 \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --class_prompt=<span class="hljs-string">"a photo of dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=1 \ | |
| --learning_rate=5e-6 \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --num_class_images=200 \ | |
| --max_train_steps=800 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div></div></div> <div class="border border-gray-200 rounded-xl px-4 relative"><div class="flex h-[22px] mt-[-12.5px] justify-between leading-none"><div class="flex px-1 items-center space-x-1 bg-white dark:bg-gray-950"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1.73em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 451 260.81"><style>.J { | |
| stroke: #dce0df; | |
| } | |
| .K { | |
| stroke-linejoin: round; | |
| } | |
| </style><g fill="#5e97f6" class="J K"><path d="M50.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M.5 217.01l25-43.3h50l-25 43.3H.5z"></path><path d="M125.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M175.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M150.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M175.5 87.1l-25 43.3h50l25-43.3h-50z"></path><path d="M200.5 43.8l-25 43.3h50l25-43.3h-50z"></path><path d="M225.5.5l-25 43.3h50l25-43.3h-50z"></path></g><g fill="#2a56c6" class="J K"><path d="M.5 217.01l25 43.3h50l-25-43.3H.5z"></path><path d="M125.5 260.31h-50l-25-43.3h50l25 43.3z"></path><path d="M175.5 260.31h-50l-25-43.3h50l25 43.3z"></path></g><g fill="#00796b" class="J K"><path d="M200.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zm50-86.61l-25-43.3-25 43.3h50z"></path><path d="M250.5 43.8l-25 43.3 25 43.3 25-43.3-25-43.3z"></path></g><path d="M125.5 173.71l-25-43.31-25 43.31h50z" fill="#3367d6" class="J K"></path><g fill="#26a69a" class="J K"><path d="M250.5 130.4h-50l-25 43.31h50l25-43.31z"></path><path d="M300.5 130.4h-50l-25 43.31h50l25-43.31z"></path></g><g fill="#9c27b0" class="J K"><path d="M350.5 43.8L325.5.5l-25 43.3 25 43.3 25-43.3z"></path><path d="M375.5 87.1l-25-43.3-25 43.3 25 43.3 25-43.3z"></path><path d="M400.5 130.4l-25-43.3-25 43.3 25 43.31 25-43.31z"></path><path d="M425.5 173.71l-25-43.31-25 43.31 25 43.3 25-43.3z"></path><path d="M450.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zM425.5.5l-25 43.3 25 43.3 25-43.3-25-43.3z"></path><path d="M375.5 87.1l25-43.3 25 43.3-25 43.3-25-43.3zm-25 43.3l-25 43.31 25 43.3 25-43.3-25-43.31z"></path><path d="M325.5 260.31l-25-43.3 25-43.3 25 43.3-25 43.3z"></path></g><path d="M275.5 260.31l-25-43.3h50l25 43.3h-50z" fill="#6a1b9a" class="J K"></path><g fill="#00695c" class="J K"><path d="M225.5 173.71h-50l25 43.3h50l-25-43.3z"></path><path d="M275.5 173.71h-50l25 43.3 25-43.3zm0-86.61l25 43.3h50l-25-43.3h-50z"></path><path d="M300.5 43.8h-50l25 43.3h50l-25-43.3zm125 216.51l-25-43.3h-50l25 43.3h50z"></path><path d="M375.5 173.71l-25 43.3h50l-25-43.3z"></path></g><g fill="#ea80fc" class="J K"><path d="M325.5.5h-50l-25 43.3h50l25-43.3zm0 173.21h-50l-25 43.3h50l25-43.3z"></path><path d="M350.5 130.4h-50l-25 43.31h50l25-43.31zM425.5.5h-50l-25 43.3h50l25-43.3z"></path><path d="M375.5 87.1l-25-43.3h50l-25 43.3z"></path></g></svg> <span>JAX</span></div> <div class="cursor-pointer flex items-center justify-center space-x-1 text-sm px-2 bg-white dark:bg-gray-950 hover:underline leading-none"><svg class="" width="0.9em" height="0.9em" viewBox="0 0 10 9" fill="currentColor" xmlns="http://www.w3.org/2000/svg"><path d="M1.39125 1.9725L0.0883333 0.669997L0.677917 0.0804138L8.9275 8.33041L8.33792 8.91958L6.95875 7.54041C6.22592 8.00523 5.37572 8.25138 4.50792 8.25C2.26125 8.25 0.392083 6.63333 0 4.5C0.179179 3.52946 0.667345 2.64287 1.39167 1.9725H1.39125ZM5.65667 6.23833L5.04667 5.62833C4.81335 5.73996 4.55116 5.77647 4.29622 5.73282C4.04129 5.68918 3.80617 5.56752 3.62328 5.38463C3.44039 5.20175 3.31874 4.96663 3.27509 4.71169C3.23144 4.45676 3.26795 4.19456 3.37958 3.96125L2.76958 3.35125C2.50447 3.75187 2.38595 4.2318 2.4341 4.70978C2.48225 5.18777 2.6941 5.63442 3.0338 5.97411C3.37349 6.31381 3.82015 6.52567 4.29813 6.57382C4.77611 6.62197 5.25605 6.50345 5.65667 6.23833ZM2.83042 1.06666C3.35 0.862497 3.91625 0.749997 4.50792 0.749997C6.75458 0.749997 8.62375 2.36666 9.01583 4.5C8.88816 5.19404 8.60119 5.84899 8.1775 6.41333L6.56917 4.805C6.61694 4.48317 6.58868 4.15463 6.48664 3.84569C6.3846 3.53675 6.21162 3.256 5.98156 3.02594C5.7515 2.79588 5.47075 2.6229 5.16181 2.52086C4.85287 2.41882 4.52433 2.39056 4.2025 2.43833L2.83042 1.06708V1.06666Z" fill="currentColor"></path></svg> <span>Hide JAX content</span></div></div> <div class="framework-content"> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"duongna/stable-diffusion-v1-4-flax"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path-to-class-images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path-to-save-model"</span> | |
| python train_dreambooth_flax.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --with_prior_preservation --prior_loss_weight=1.0 \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --class_prompt=<span class="hljs-string">"a photo of dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --learning_rate=5e-6 \ | |
| --num_class_images=200 \ | |
| --max_train_steps=800 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> </div></div></div> <h2 class="relative group"><a id="finetuning-the-text-encoder-and-unet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#finetuning-the-text-encoder-and-unet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Finetuning the text encoder and UNet</span></h2> <p data-svelte-h="svelte-grybit">The script also allows you to finetune the <code>text_encoder</code> along with the <code>unet</code>. In our experiments (check out the <a href="https://huggingface.co/blog/dreambooth" rel="nofollow">Training Stable Diffusion with DreamBooth using 🧨 Diffusers</a> post for more details), this yields much better results, especially when generating images of faces.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-1uh02te">Training the text encoder requires additional memory and it won’t fit on a 16GB GPU. You’ll need at least 24GB VRAM to use this option.</p></div> <p data-svelte-h="svelte-1k2uzj9">Pass the <code>--train_text_encoder</code> argument to the training script to enable finetuning the <code>text_encoder</code> and <code>unet</code>:</p> <div class="space-y-10 py-6 2xl:py-8 2xl:-mx-4"> <div class="border border-gray-200 rounded-xl px-4 relative"><div class="flex h-[22px] mt-[-12.5px] justify-between leading-none"><div class="flex px-1 items-center space-x-1 bg-white dark:bg-gray-950"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> <span>Pytorch</span></div> <div class="cursor-pointer flex items-center justify-center space-x-1 text-sm px-2 bg-white dark:bg-gray-950 hover:underline leading-none"><svg class="" width="0.9em" height="0.9em" viewBox="0 0 10 9" fill="currentColor" xmlns="http://www.w3.org/2000/svg"><path d="M1.39125 1.9725L0.0883333 0.669997L0.677917 0.0804138L8.9275 8.33041L8.33792 8.91958L6.95875 7.54041C6.22592 8.00523 5.37572 8.25138 4.50792 8.25C2.26125 8.25 0.392083 6.63333 0 4.5C0.179179 3.52946 0.667345 2.64287 1.39167 1.9725H1.39125ZM5.65667 6.23833L5.04667 5.62833C4.81335 5.73996 4.55116 5.77647 4.29622 5.73282C4.04129 5.68918 3.80617 5.56752 3.62328 5.38463C3.44039 5.20175 3.31874 4.96663 3.27509 4.71169C3.23144 4.45676 3.26795 4.19456 3.37958 3.96125L2.76958 3.35125C2.50447 3.75187 2.38595 4.2318 2.4341 4.70978C2.48225 5.18777 2.6941 5.63442 3.0338 5.97411C3.37349 6.31381 3.82015 6.52567 4.29813 6.57382C4.77611 6.62197 5.25605 6.50345 5.65667 6.23833ZM2.83042 1.06666C3.35 0.862497 3.91625 0.749997 4.50792 0.749997C6.75458 0.749997 8.62375 2.36666 9.01583 4.5C8.88816 5.19404 8.60119 5.84899 8.1775 6.41333L6.56917 4.805C6.61694 4.48317 6.58868 4.15463 6.48664 3.84569C6.3846 3.53675 6.21162 3.256 5.98156 3.02594C5.7515 2.79588 5.47075 2.6229 5.16181 2.52086C4.85287 2.41882 4.52433 2.39056 4.2025 2.43833L2.83042 1.06708V1.06666Z" fill="currentColor"></path></svg> <span>Hide Pytorch content</span></div></div> <div class="framework-content"> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span> | |
| accelerate launch train_dreambooth.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --train_text_encoder \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --with_prior_preservation --prior_loss_weight=1.0 \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --class_prompt=<span class="hljs-string">"a photo of dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --use_8bit_adam \ | |
| --gradient_checkpointing \ | |
| --learning_rate=2e-6 \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --num_class_images=200 \ | |
| --max_train_steps=800 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div></div></div> <div class="border border-gray-200 rounded-xl px-4 relative"><div class="flex h-[22px] mt-[-12.5px] justify-between leading-none"><div class="flex px-1 items-center space-x-1 bg-white dark:bg-gray-950"><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1.73em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 451 260.81"><style>.J { | |
| stroke: #dce0df; | |
| } | |
| .K { | |
| stroke-linejoin: round; | |
| } | |
| </style><g fill="#5e97f6" class="J K"><path d="M50.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M.5 217.01l25-43.3h50l-25 43.3H.5z"></path><path d="M125.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M175.5 173.71h-50l-25 43.3h50l25-43.3z"></path><path d="M150.5 130.4l-25 43.31h50l25-43.31h-50z"></path><path d="M175.5 87.1l-25 43.3h50l25-43.3h-50z"></path><path d="M200.5 43.8l-25 43.3h50l25-43.3h-50z"></path><path d="M225.5.5l-25 43.3h50l25-43.3h-50z"></path></g><g fill="#2a56c6" class="J K"><path d="M.5 217.01l25 43.3h50l-25-43.3H.5z"></path><path d="M125.5 260.31h-50l-25-43.3h50l25 43.3z"></path><path d="M175.5 260.31h-50l-25-43.3h50l25 43.3z"></path></g><g fill="#00796b" class="J K"><path d="M200.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zm50-86.61l-25-43.3-25 43.3h50z"></path><path d="M250.5 43.8l-25 43.3 25 43.3 25-43.3-25-43.3z"></path></g><path d="M125.5 173.71l-25-43.31-25 43.31h50z" fill="#3367d6" class="J K"></path><g fill="#26a69a" class="J K"><path d="M250.5 130.4h-50l-25 43.31h50l25-43.31z"></path><path d="M300.5 130.4h-50l-25 43.31h50l25-43.31z"></path></g><g fill="#9c27b0" class="J K"><path d="M350.5 43.8L325.5.5l-25 43.3 25 43.3 25-43.3z"></path><path d="M375.5 87.1l-25-43.3-25 43.3 25 43.3 25-43.3z"></path><path d="M400.5 130.4l-25-43.3-25 43.3 25 43.31 25-43.31z"></path><path d="M425.5 173.71l-25-43.31-25 43.31 25 43.3 25-43.3z"></path><path d="M450.5 217.01l-25-43.3-25 43.3 25 43.3 25-43.3zM425.5.5l-25 43.3 25 43.3 25-43.3-25-43.3z"></path><path d="M375.5 87.1l25-43.3 25 43.3-25 43.3-25-43.3zm-25 43.3l-25 43.31 25 43.3 25-43.3-25-43.31z"></path><path d="M325.5 260.31l-25-43.3 25-43.3 25 43.3-25 43.3z"></path></g><path d="M275.5 260.31l-25-43.3h50l25 43.3h-50z" fill="#6a1b9a" class="J K"></path><g fill="#00695c" class="J K"><path d="M225.5 173.71h-50l25 43.3h50l-25-43.3z"></path><path d="M275.5 173.71h-50l25 43.3 25-43.3zm0-86.61l25 43.3h50l-25-43.3h-50z"></path><path d="M300.5 43.8h-50l25 43.3h50l-25-43.3zm125 216.51l-25-43.3h-50l25 43.3h50z"></path><path d="M375.5 173.71l-25 43.3h50l-25-43.3z"></path></g><g fill="#ea80fc" class="J K"><path d="M325.5.5h-50l-25 43.3h50l25-43.3zm0 173.21h-50l-25 43.3h50l25-43.3z"></path><path d="M350.5 130.4h-50l-25 43.31h50l25-43.31zM425.5.5h-50l-25 43.3h50l25-43.3z"></path><path d="M375.5 87.1l-25-43.3h50l-25 43.3z"></path></g></svg> <span>JAX</span></div> <div class="cursor-pointer flex items-center justify-center space-x-1 text-sm px-2 bg-white dark:bg-gray-950 hover:underline leading-none"><svg class="" width="0.9em" height="0.9em" viewBox="0 0 10 9" fill="currentColor" xmlns="http://www.w3.org/2000/svg"><path d="M1.39125 1.9725L0.0883333 0.669997L0.677917 0.0804138L8.9275 8.33041L8.33792 8.91958L6.95875 7.54041C6.22592 8.00523 5.37572 8.25138 4.50792 8.25C2.26125 8.25 0.392083 6.63333 0 4.5C0.179179 3.52946 0.667345 2.64287 1.39167 1.9725H1.39125ZM5.65667 6.23833L5.04667 5.62833C4.81335 5.73996 4.55116 5.77647 4.29622 5.73282C4.04129 5.68918 3.80617 5.56752 3.62328 5.38463C3.44039 5.20175 3.31874 4.96663 3.27509 4.71169C3.23144 4.45676 3.26795 4.19456 3.37958 3.96125L2.76958 3.35125C2.50447 3.75187 2.38595 4.2318 2.4341 4.70978C2.48225 5.18777 2.6941 5.63442 3.0338 5.97411C3.37349 6.31381 3.82015 6.52567 4.29813 6.57382C4.77611 6.62197 5.25605 6.50345 5.65667 6.23833ZM2.83042 1.06666C3.35 0.862497 3.91625 0.749997 4.50792 0.749997C6.75458 0.749997 8.62375 2.36666 9.01583 4.5C8.88816 5.19404 8.60119 5.84899 8.1775 6.41333L6.56917 4.805C6.61694 4.48317 6.58868 4.15463 6.48664 3.84569C6.3846 3.53675 6.21162 3.256 5.98156 3.02594C5.7515 2.79588 5.47075 2.6229 5.16181 2.52086C4.85287 2.41882 4.52433 2.39056 4.2025 2.43833L2.83042 1.06708V1.06666Z" fill="currentColor"></path></svg> <span>Hide JAX content</span></div></div> <div class="framework-content"> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"duongna/stable-diffusion-v1-4-flax"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path-to-class-images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path-to-save-model"</span> | |
| python train_dreambooth_flax.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --train_text_encoder \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --with_prior_preservation --prior_loss_weight=1.0 \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --class_prompt=<span class="hljs-string">"a photo of dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --learning_rate=2e-6 \ | |
| --num_class_images=200 \ | |
| --max_train_steps=800 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> </div></div></div> <h2 class="relative group"><a id="finetuning-with-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#finetuning-with-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Finetuning with LoRA</span></h2> <p data-svelte-h="svelte-1au4x04">You can also use Low-Rank Adaptation of Large Language Models (LoRA), a fine-tuning technique for accelerating training large models, on DreamBooth. For more details, take a look at the <a href="./lora#dreambooth">LoRA training</a> guide.</p> <h2 class="relative group"><a id="saving-checkpoints-while-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#saving-checkpoints-while-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Saving checkpoints while training</span></h2> <p data-svelte-h="svelte-1p0pcfx">It’s easy to overfit while training with Dreambooth, so sometimes it’s useful to save regular checkpoints during the training process. One of the intermediate checkpoints might actually work better than the final model! Pass the following argument to the training script to enable saving checkpoints:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> --checkpointing_steps=500<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-8xsnea">This saves the full training state in subfolders of your <code>output_dir</code>. Subfolder names begin with the prefix <code>checkpoint-</code>, followed by the number of steps performed so far; for example, <code>checkpoint-1500</code> would be a checkpoint saved after 1500 training steps.</p> <h3 class="relative group"><a id="resume-training-from-a-saved-checkpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#resume-training-from-a-saved-checkpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Resume training from a saved checkpoint</span></h3> <p data-svelte-h="svelte-1alj2e8">If you want to resume training from any of the saved checkpoints, you can pass the argument <code>--resume_from_checkpoint</code> to the script and specify the name of the checkpoint you want to use. You can also use the special string <code>"latest"</code> to resume from the last saved checkpoint (the one with the largest number of steps). For example, the following would resume training from the checkpoint saved after 1500 steps:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> --resume_from_checkpoint=<span class="hljs-string">"checkpoint-1500"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1m22ucf">This is a good opportunity to tweak some of your hyperparameters if you wish.</p> <h3 class="relative group"><a id="inference-from-a-saved-checkpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference-from-a-saved-checkpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference from a saved checkpoint</span></h3> <p data-svelte-h="svelte-klvvh8">Saved checkpoints are stored in a format suitable for resuming training. They not only include the model weights, but also the state of the optimizer, data loaders, and learning rate.</p> <p data-svelte-h="svelte-1tyy594">If you have <strong><code>"accelerate>=0.16.0"</code></strong> installed, use the following code to run | |
| inference from an intermediate checkpoint.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline, UNet2DConditionModel | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> CLIPTextModel | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-comment"># Load the pipeline with the same arguments (model, revision) that were used for training</span> | |
| model_id = <span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| unet = UNet2DConditionModel.from_pretrained(<span class="hljs-string">"/sddata/dreambooth/daruma-v2-1/checkpoint-100/unet"</span>) | |
| <span class="hljs-comment"># if you have trained with `--args.train_text_encoder` make sure to also load the text encoder</span> | |
| text_encoder = CLIPTextModel.from_pretrained(<span class="hljs-string">"/sddata/dreambooth/daruma-v2-1/checkpoint-100/text_encoder"</span>) | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| model_id, unet=unet, text_encoder=text_encoder, dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ) | |
| pipeline.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># Perform inference, or save, or push to the hub</span> | |
| pipeline.save_pretrained(<span class="hljs-string">"dreambooth-pipeline"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10s7sps">If you have <strong><code>"accelerate<0.16.0"</code></strong> installed, you need to convert it to an inference pipeline first:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-comment"># Load the pipeline with the same arguments (model, revision) that were used for training</span> | |
| model_id = <span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| pipeline = DiffusionPipeline.from_pretrained(model_id, use_safetensors=<span class="hljs-literal">True</span>) | |
| accelerator = Accelerator() | |
| <span class="hljs-comment"># Use text_encoder if `--train_text_encoder` was used for the initial training</span> | |
| unet, text_encoder = accelerator.prepare(pipeline.unet, pipeline.text_encoder) | |
| <span class="hljs-comment"># Restore state from a checkpoint path. You have to use the absolute path here.</span> | |
| accelerator.load_state(<span class="hljs-string">"/sddata/dreambooth/daruma-v2-1/checkpoint-100"</span>) | |
| <span class="hljs-comment"># Rebuild the pipeline with the unwrapped models (assignment to .unet and .text_encoder should work too)</span> | |
| pipeline = DiffusionPipeline.from_pretrained( | |
| model_id, | |
| unet=accelerator.unwrap_model(unet), | |
| text_encoder=accelerator.unwrap_model(text_encoder), | |
| use_safetensors=<span class="hljs-literal">True</span>, | |
| ) | |
| <span class="hljs-comment"># Perform inference, or save, or push to the hub</span> | |
| pipeline.save_pretrained(<span class="hljs-string">"dreambooth-pipeline"</span>)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="optimizations-for-different-gpu-sizes" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#optimizations-for-different-gpu-sizes"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Optimizations for different GPU sizes</span></h2> <p data-svelte-h="svelte-wcr4p4">Depending on your hardware, there are a few different ways to optimize DreamBooth on GPUs from 16GB to just 8GB!</p> <h3 class="relative group"><a id="xformers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xformers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>xFormers</span></h3> <p data-svelte-h="svelte-1tmlmwd"><a href="https://github.com/facebookresearch/xformers" rel="nofollow">xFormers</a> is a toolbox for optimizing Transformers, and it includes a <a href="https://facebookresearch.github.io/xformers/components/ops.html#module-xformers.ops" rel="nofollow">memory-efficient attention</a> mechanism that is used in 🧨 Diffusers. You’ll need to <a href="./optimization/xformers">install xFormers</a> and then add the following argument to your training script:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> --enable_xformers_memory_efficient_attention<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ze7fj8">xFormers is not available in Flax.</p> <h3 class="relative group"><a id="set-gradients-to-none" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#set-gradients-to-none"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Set gradients to none</span></h3> <p data-svelte-h="svelte-1adkuc5">Another way you can lower your memory footprint is to <a href="https://pytorch.org/docs/stable/generated/torch.optim.Optimizer.zero_grad.html" rel="nofollow">set the gradients</a> to <code>None</code> instead of zero. However, this may change certain behaviors, so if you run into any issues, try removing this argument. Add the following argument to your training script to set the gradients to <code>None</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> --set_grads_to_none<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="16gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#16gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>16GB GPU</span></h3> <p data-svelte-h="svelte-e7gg43">With the help of gradient checkpointing and <a href="https://github.com/TimDettmers/bitsandbytes" rel="nofollow">bitsandbytes</a> 8-bit optimizer, it’s possible to train DreamBooth on a 16GB GPU. Make sure you have bitsandbytes installed:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install bitsandbytes<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1od8ya5">Then pass the <code>--use_8bit_adam</code> option to the training script:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span> | |
| accelerate launch train_dreambooth.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --with_prior_preservation --prior_loss_weight=1.0 \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --class_prompt=<span class="hljs-string">"a photo of dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=2 --gradient_checkpointing \ | |
| --use_8bit_adam \ | |
| --learning_rate=5e-6 \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --num_class_images=200 \ | |
| --max_train_steps=800 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="12gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#12gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>12GB GPU</span></h3> <p data-svelte-h="svelte-1g7pdkw">To run DreamBooth on a 12GB GPU, you’ll need to enable gradient checkpointing, the 8-bit optimizer, xFormers, and set the gradients to <code>None</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path-to-class-images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path-to-save-model"</span> | |
| accelerate launch train_dreambooth.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --with_prior_preservation --prior_loss_weight=1.0 \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --class_prompt=<span class="hljs-string">"a photo of dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=1 --gradient_checkpointing \ | |
| --use_8bit_adam \ | |
| --enable_xformers_memory_efficient_attention \ | |
| --set_grads_to_none \ | |
| --learning_rate=2e-6 \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --num_class_images=200 \ | |
| --max_train_steps=800 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="8-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#8-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>8 GB GPU</span></h3> <p data-svelte-h="svelte-4mpnbj">For 8GB GPUs, you’ll need the help of <a href="https://www.deepspeed.ai/" rel="nofollow">DeepSpeed</a> to offload some | |
| tensors from the VRAM to either the CPU or NVME, enabling training with less GPU memory.</p> <p data-svelte-h="svelte-1fx999v">Run the following command to configure your 🤗 Accelerate environment:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-unul6w">During configuration, confirm that you want to use DeepSpeed. Now it’s possible to train on under 8GB VRAM by combining DeepSpeed stage 2, fp16 mixed precision, and offloading the model parameters and the optimizer state to the CPU. The drawback is that this requires more system RAM, about 25 GB. See <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed" rel="nofollow">the DeepSpeed documentation</a> for more configuration options.</p> <p data-svelte-h="svelte-pbdeen">You should also change the default Adam optimizer to DeepSpeed’s optimized version of Adam | |
| <a href="https://deepspeed.readthedocs.io/en/latest/optimizers.html#adam-cpu" rel="nofollow"><code>deepspeed.ops.adam.DeepSpeedCPUAdam</code></a> for a substantial speedup. Enabling <code>DeepSpeedCPUAdam</code> requires your system’s CUDA toolchain version to be the same as the one installed with PyTorch.</p> <p data-svelte-h="svelte-uvirud">8-bit optimizers don’t seem to be compatible with DeepSpeed at the moment.</p> <p data-svelte-h="svelte-erekru">Launch training with the following command:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"./dog"</span> | |
| <span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span> | |
| accelerate launch train_dreambooth.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --with_prior_preservation --prior_loss_weight=1.0 \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --class_prompt=<span class="hljs-string">"a photo of dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --sample_batch_size=1 \ | |
| --gradient_accumulation_steps=1 --gradient_checkpointing \ | |
| --learning_rate=5e-6 \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --num_class_images=200 \ | |
| --max_train_steps=800 \ | |
| --mixed_precision=fp16 \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference</span></h2> <p data-svelte-h="svelte-b5z2mg">Once you have trained a model, specify the path to where the model is saved, and use it for inference in the <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>. Make sure your prompts include the special <code>identifier</code> used during training (<code>sks</code> in the previous examples).</p> <p data-svelte-h="svelte-14k0zzn">If you have <strong><code>"accelerate>=0.16.0"</code></strong> installed, you can use the following code to run | |
| inference from an intermediate checkpoint:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| model_id = <span class="hljs-string">"path_to_saved_model"</span> | |
| pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>).to(<span class="hljs-string">"cuda"</span>) | |
| prompt = <span class="hljs-string">"A photo of sks dog in a bucket"</span> | |
| image = pipe(prompt, num_inference_steps=<span class="hljs-number">50</span>, guidance_scale=<span class="hljs-number">7.5</span>).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"dog-bucket.png"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-rw69oc">You may also run inference from any of the <a href="#inference-from-a-saved-checkpoint">saved training checkpoints</a>.</p> <h2 class="relative group"><a id="if" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#if"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>IF</span></h2> <p data-svelte-h="svelte-vx8447">You can use the lora and full dreambooth scripts to train the text to image <a href="https://huggingface.co/DeepFloyd/IF-I-XL-v1.0" rel="nofollow">IF model</a> and the stage II upscaler | |
| <a href="https://huggingface.co/DeepFloyd/IF-II-L-v1.0" rel="nofollow">IF model</a>.</p> <p data-svelte-h="svelte-1gxlb2o">Note that IF has a predicted variance, and our finetuning scripts only train the models predicted error, so for finetuned IF models we switch to a fixed | |
| variance schedule. The full finetuning scripts will update the scheduler config for the full saved model. However, when loading saved LoRA weights, you | |
| must also update the pipeline’s scheduler config.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"DeepFloyd/IF-I-XL-v1.0"</span>, use_safetensors=<span class="hljs-literal">True</span>) | |
| pipe.load_lora_weights(<span class="hljs-string">"<lora weights path>"</span>) | |
| <span class="hljs-comment"># Update scheduler config to fixed variance schedule</span> | |
| pipe.scheduler = pipe.scheduler.__class__.from_config(pipe.scheduler.config, variance_type=<span class="hljs-string">"fixed_small"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-j509hk">Additionally, a few alternative cli flags are needed for IF.</p> <p data-svelte-h="svelte-7zyzyx"><code>--resolution=64</code>: IF is a pixel space diffusion model. In order to operate on un-compressed pixels, the input images are of a much smaller resolution.</p> <p data-svelte-h="svelte-hvq437"><code>--pre_compute_text_embeddings</code>: IF uses <a href="https://huggingface.co/docs/transformers/model_doc/t5" rel="nofollow">T5</a> for its text encoder. In order to save GPU memory, we pre compute all text embeddings and then de-allocate | |
| T5.</p> <p data-svelte-h="svelte-1d1zpcw"><code>--tokenizer_max_length=77</code>: T5 has a longer default text length, but the default IF encoding procedure uses a smaller number.</p> <p data-svelte-h="svelte-y6ibo7"><code>--text_encoder_use_attention_mask</code>: T5 passes the attention mask to the text encoder.</p> <h3 class="relative group"><a id="tips-and-tricks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#tips-and-tricks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Tips and Tricks</span></h3> <p data-svelte-h="svelte-14brpob">We find LoRA to be sufficient for finetuning the stage I model as the low resolution of the model makes representing finegrained detail hard regardless.</p> <p data-svelte-h="svelte-vbhic8">For common and/or not-visually complex object concepts, you can get away with not-finetuning the upscaler. Just be sure to adjust the prompt passed to the | |
| upscaler to remove the new token from the instance prompt. I.e. if your stage I prompt is “a sks dog”, use “a dog” for your stage II prompt.</p> <p data-svelte-h="svelte-9xbc5m">For finegrained detail like faces that aren’t present in the original training set, we find that full finetuning of the stage II upscaler is better than | |
| LoRA finetuning stage II.</p> <p data-svelte-h="svelte-bx1cek">For finegrained detail like faces, we find that lower learning rates along with larger batch sizes work best.</p> <p data-svelte-h="svelte-19pvvud">For stage II, we find that lower learning rates are also needed.</p> <p data-svelte-h="svelte-1qkh0q9">We found experimentally that the DDPM scheduler with the default larger number of denoising steps to sometimes work better than the DPM Solver scheduler | |
| used in the training scripts.</p> <h3 class="relative group"><a id="stage-ii-additional-validation-images" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stage-ii-additional-validation-images"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stage II additional validation images</span></h3> <p data-svelte-h="svelte-jdi7gu">The stage II validation requires images to upscale, we can download a downsized version of the training set:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> snapshot_download | |
| local_dir = <span class="hljs-string">"./dog_downsized"</span> | |
| snapshot_download( | |
| <span class="hljs-string">"diffusers/dog-example-downsized"</span>, | |
| local_dir=local_dir, | |
| repo_type=<span class="hljs-string">"dataset"</span>, | |
| ignore_patterns=<span class="hljs-string">".gitattributes"</span>, | |
| )<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="if-stage-i-lora-dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#if-stage-i-lora-dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>IF stage I LoRA Dreambooth</span></h3> <p data-svelte-h="svelte-1kpgnux">This training configuration requires ~28 GB VRAM.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"DeepFloyd/IF-I-XL-v1.0"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"dog"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"dreambooth_dog_lora"</span> | |
| accelerate launch train_dreambooth_lora.py \ | |
| --report_to wandb \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --instance_prompt=<span class="hljs-string">"a sks dog"</span> \ | |
| --resolution=64 \ | |
| --train_batch_size=4 \ | |
| --gradient_accumulation_steps=1 \ | |
| --learning_rate=5e-6 \ | |
| --scale_lr \ | |
| --max_train_steps=1200 \ | |
| --validation_prompt=<span class="hljs-string">"a sks dog"</span> \ | |
| --validation_epochs=25 \ | |
| --checkpointing_steps=100 \ | |
| --pre_compute_text_embeddings \ | |
| --tokenizer_max_length=77 \ | |
| --text_encoder_use_attention_mask<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="if-stage-ii-lora-dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#if-stage-ii-lora-dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>IF stage II LoRA Dreambooth</span></h3> <p data-svelte-h="svelte-cqxip6"><code>--validation_images</code>: These images are upscaled during validation steps.</p> <p data-svelte-h="svelte-a9m9uf"><code>--class_labels_conditioning=timesteps</code>: Pass additional conditioning to the UNet needed for stage II.</p> <p data-svelte-h="svelte-ilyx19"><code>--learning_rate=1e-6</code>: Lower learning rate than stage I.</p> <p data-svelte-h="svelte-rh5mqd"><code>--resolution=256</code>: The upscaler expects higher resolution inputs</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"DeepFloyd/IF-II-L-v1.0"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"dog"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"dreambooth_dog_upscale"</span> | |
| <span class="hljs-built_in">export</span> VALIDATION_IMAGES=<span class="hljs-string">"dog_downsized/image_1.png dog_downsized/image_2.png dog_downsized/image_3.png dog_downsized/image_4.png"</span> | |
| python train_dreambooth_lora.py \ | |
| --report_to wandb \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --instance_prompt=<span class="hljs-string">"a sks dog"</span> \ | |
| --resolution=256 \ | |
| --train_batch_size=4 \ | |
| --gradient_accumulation_steps=1 \ | |
| --learning_rate=1e-6 \ | |
| --max_train_steps=2000 \ | |
| --validation_prompt=<span class="hljs-string">"a sks dog"</span> \ | |
| --validation_epochs=100 \ | |
| --checkpointing_steps=500 \ | |
| --pre_compute_text_embeddings \ | |
| --tokenizer_max_length=77 \ | |
| --text_encoder_use_attention_mask \ | |
| --validation_images <span class="hljs-variable">$VALIDATION_IMAGES</span> \ | |
| --class_labels_conditioning=timesteps<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="if-stage-i-full-dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#if-stage-i-full-dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>IF Stage I Full Dreambooth</span></h3> <p data-svelte-h="svelte-1jd8uaa"><code>--skip_save_text_encoder</code>: When training the full model, this will skip saving the entire T5 with the finetuned model. You can still load the pipeline | |
| with a T5 loaded from the original model.</p> <p data-svelte-h="svelte-hly0r2"><code>use_8bit_adam</code>: Due to the size of the optimizer states, we recommend training the full XL IF model with 8bit adam.</p> <p data-svelte-h="svelte-b8gry9"><code>--learning_rate=1e-7</code>: For full dreambooth, IF requires very low learning rates. With higher learning rates model quality will degrade. Note that it is | |
| likely the learning rate can be increased with larger batch sizes.</p> <p data-svelte-h="svelte-jrp3vq">Using 8bit adam and a batch size of 4, the model can be trained in ~48 GB VRAM.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"DeepFloyd/IF-I-XL-v1.0"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"dog"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"dreambooth_if"</span> | |
| accelerate launch train_dreambooth.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --resolution=64 \ | |
| --train_batch_size=4 \ | |
| --gradient_accumulation_steps=1 \ | |
| --learning_rate=1e-7 \ | |
| --max_train_steps=150 \ | |
| --validation_prompt <span class="hljs-string">"a photo of sks dog"</span> \ | |
| --validation_steps 25 \ | |
| --text_encoder_use_attention_mask \ | |
| --tokenizer_max_length 77 \ | |
| --pre_compute_text_embeddings \ | |
| --use_8bit_adam \ | |
| --set_grads_to_none \ | |
| --skip_save_text_encoder \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="if-stage-ii-full-dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#if-stage-ii-full-dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>IF Stage II Full Dreambooth</span></h3> <p data-svelte-h="svelte-1qe7hwo"><code>--learning_rate=5e-6</code>: With a smaller effective batch size of 4, we found that we required learning rates as low as | |
| 1e-8.</p> <p data-svelte-h="svelte-rh5mqd"><code>--resolution=256</code>: The upscaler expects higher resolution inputs</p> <p data-svelte-h="svelte-1acbypw"><code>--train_batch_size=2</code> and <code>--gradient_accumulation_steps=6</code>: We found that full training of stage II particularly with | |
| faces required large effective batch sizes.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"DeepFloyd/IF-II-L-v1.0"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"dog"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"dreambooth_dog_upscale"</span> | |
| <span class="hljs-built_in">export</span> VALIDATION_IMAGES=<span class="hljs-string">"dog_downsized/image_1.png dog_downsized/image_2.png dog_downsized/image_3.png dog_downsized/image_4.png"</span> | |
| accelerate launch train_dreambooth.py \ | |
| --report_to wandb \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --instance_prompt=<span class="hljs-string">"a sks dog"</span> \ | |
| --resolution=256 \ | |
| --train_batch_size=2 \ | |
| --gradient_accumulation_steps=6 \ | |
| --learning_rate=5e-6 \ | |
| --max_train_steps=2000 \ | |
| --validation_prompt=<span class="hljs-string">"a sks dog"</span> \ | |
| --validation_steps=150 \ | |
| --checkpointing_steps=500 \ | |
| --pre_compute_text_embeddings \ | |
| --tokenizer_max_length=77 \ | |
| --text_encoder_use_attention_mask \ | |
| --validation_images <span class="hljs-variable">$VALIDATION_IMAGES</span> \ | |
| --class_labels_conditioning timesteps \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="stable-diffusion-xl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-diffusion-xl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stable Diffusion XL</span></h2> <p data-svelte-h="svelte-xyk1up">We support fine-tuning of the UNet and text encoders shipped in <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">Stable Diffusion XL</a> with DreamBooth and LoRA via the <code>train_dreambooth_lora_sdxl.py</code> script. Please refer to the docs <a href="https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_sdxl.md" rel="nofollow">here</a>.</p> <p></p> | |
| <script> | |
| { | |
| __sveltekit_ynvcvq = { | |
| assets: "/docs/diffusers/v0.22.2/en", | |
| base: "/docs/diffusers/v0.22.2/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js"), | |
| import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 139], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 132 kB
- Xet hash:
- e217628251bf4648d69e8c2b7294156aecb9c0f0e0a1e2e7108917536081871b
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.