Buckets:

hf-doc-build
/

doc

Files

xet

hf-doc-build/doc / diffusers /v0.10.2 /en /training /dreambooth.html

rtrm

about 2 months ago

download

raw

38 kB

	<meta charset="utf-8" /><meta http-equiv="content-security-policy" content=""><meta name="hf:doc:metadata" content="{"local":"dreambooth-finetuning-example","sections":[{"local":"training-locally","sections":[{"local":"installing-the-dependencies","title":"Installing the dependencies"},{"local":"dog-toy-example","title":"Dog toy example"},{"local":"training-with-a-priorpreserving-loss","title":"Training with a prior-preserving loss"},{"local":"training-on-a-16gb-gpu","title":"Training on a 16GB GPU"},{"local":"finetune-the-text-encoder-in-addition-to-the-unet","title":"Fine-tune the text encoder in addition to the UNet"},{"local":"training-on-a-8-gb-gpu","title":"Training on a 8 GB GPU:"}],"title":"Training locally "},{"local":"inference","title":"Inference"}],"title":"DreamBooth fine-tuning example"}" data-svelte="svelte-1phssyn">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/assets/pages/__layout.svelte-hf-doc-builder.css">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/start-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/chunks/vendor-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/chunks/paths-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/pages/__layout.svelte-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/pages/training/dreambooth.mdx-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/chunks/Tip-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/chunks/IconCopyLink-hf-doc-builder.js">
	<link rel="modulepreload" href="/docs/diffusers/v0.10.2/en/_app/chunks/CodeBlock-hf-doc-builder.js">






	<h1 class="relative group"><a id="dreambooth-finetuning-example" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth-finetuning-example"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>DreamBooth fine-tuning example
	</span></h1>

	<p><a href="https://arxiv.org/abs/2208.12242" rel="nofollow">DreamBooth</a> is a method to personalize text-to-image models like stable diffusion given just a few (3~5) images of a subject.</p>
	<p><img src="https://dreambooth.github.io/DreamBooth_files/teaser_static.jpg" alt="Dreambooth examples from the project's blog">
	<em>Dreambooth examples from the <a href="https://dreambooth.github.io" rel="nofollow">project’s blog</a>.</em></p>
	<p>The <a href="https://github.com/huggingface/diffusers/tree/main/examples/dreambooth" rel="nofollow">Dreambooth training script</a> shows how to implement this training procedure on a pre-trained Stable Diffusion model.</p>


	<div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400">
	<p>Dreambooth fine-tuning is very sensitive to hyperparameters and easy to overfit. We recommend you take a look at our <a href="https://huggingface.co/blog/dreambooth" rel="nofollow">in-depth analysis</a> with recommended settings for different subjects, and go from there.</p></div>
	<h2 class="relative group"><a id="training-locally" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-locally"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Training locally
	</span></h2>

	<h3 class="relative group"><a id="installing-the-dependencies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#installing-the-dependencies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Installing the dependencies
	</span></h3>

	<p>Before running the scripts, make sure to install the library’s training dependencies. We also recommend to install <code>diffusers</code> from the <code>main</code> github branch.</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START -->pip install git+https://github.com/huggingface/diffusers
	pip install -U -r diffusers/examples/dreambooth/requirements.txt<!-- HTML_TAG_END --></pre></div>
	<p>Then initialize and configure a <a href="https://github.com/huggingface/accelerate/" rel="nofollow">🤗 Accelerate</a> environment with:</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div>
	<p>You need to accept the model license before downloading or using the weights. In this example we’ll use model version <code>v1-4</code>, so you’ll need to visit <a href="https://huggingface.co/CompVis/stable-diffusion-v1-4" rel="nofollow">its card</a>, read the license and tick the checkbox if you agree. </p>
	<p>You have to be a registered user in 🤗 Hugging Face Hub, and you’ll also need to use an access token for the code to work. For more information on access tokens, please refer to <a href="https://huggingface.co/docs/hub/security-tokens" rel="nofollow">this section of the documentation</a>.</p>
	<p>Run the following command to authenticate your token</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START -->huggingface-cli login<!-- HTML_TAG_END --></pre></div>
	<p>If you have already cloned the repo, then you won’t need to go through these steps. Instead, you can pass the path to your local checkout to the training script and it will be loaded from there.</p>
	<h3 class="relative group"><a id="dog-toy-example" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dog-toy-example"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Dog toy example
	</span></h3>

	<p>In this example we’ll use <a href="https://drive.google.com/drive/folders/1BO_dyz-p65qhBRRMRA4TbZ8qW4rB99JZ" rel="nofollow">these images</a> to add a new concept to Stable Diffusion using the Dreambooth process. They will be our training data. Please, download them and place them somewhere in your system.</p>
	<p>Then you can launch the training script using:</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span>
	<span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"path_to_training_images"</span>
	<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span>

	accelerate launch train_dreambooth.py \
	--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
	--instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \
	--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
	--instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \
	--resolution=512 \
	--train_batch_size=1 \
	--gradient_accumulation_steps=1 \
	--learning_rate=5e-6 \
	--lr_scheduler=<span class="hljs-string">"constant"</span> \
	--lr_warmup_steps=0 \
	--max_train_steps=400<!-- HTML_TAG_END --></pre></div>
	<h3 class="relative group"><a id="training-with-a-priorpreserving-loss" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-with-a-priorpreserving-loss"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Training with a prior-preserving loss
	</span></h3>

	<p>Prior preservation is used to avoid overfitting and language-drift. Please, refer to the paper to learn more about it if you are interested. For prior preservation, we use other images of the same class as part of the training process. The nice thing is that we can generate those images using the Stable Diffusion model itself! The training script will save the generated images to a local path we specify.</p>
	<p>According to the paper, it’s recommended to generate <code>num_epochs * num_samples</code> images for prior preservation. 200-300 works well for most cases.</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span>
	<span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"path_to_training_images"</span>
	<span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span>
	<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span>

	accelerate launch train_dreambooth.py \
	--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
	--instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \
	--class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \
	--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
	--with_prior_preservation --prior_loss_weight=1.0 \
	--instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \
	--class_prompt=<span class="hljs-string">"a photo of dog"</span> \
	--resolution=512 \
	--train_batch_size=1 \
	--gradient_accumulation_steps=1 \
	--learning_rate=5e-6 \
	--lr_scheduler=<span class="hljs-string">"constant"</span> \
	--lr_warmup_steps=0 \
	--num_class_images=200 \
	--max_train_steps=800<!-- HTML_TAG_END --></pre></div>
	<h3 class="relative group"><a id="training-on-a-16gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-a-16gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Training on a 16GB GPU
	</span></h3>

	<p>With the help of gradient checkpointing and the 8-bit optimizer from <a href="https://github.com/TimDettmers/bitsandbytes" rel="nofollow">bitsandbytes</a>, it’s possible to train dreambooth on a 16GB GPU.</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START -->pip install bitsandbytes<!-- HTML_TAG_END --></pre></div>
	<p>Then pass the <code>--use_8bit_adam</code> option to the training script.</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span>
	<span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"path_to_training_images"</span>
	<span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span>
	<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span>

	accelerate launch train_dreambooth.py \
	--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
	--instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \
	--class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \
	--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
	--with_prior_preservation --prior_loss_weight=1.0 \
	--instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \
	--class_prompt=<span class="hljs-string">"a photo of dog"</span> \
	--resolution=512 \
	--train_batch_size=1 \
	--gradient_accumulation_steps=2 --gradient_checkpointing \
	--use_8bit_adam \
	--learning_rate=5e-6 \
	--lr_scheduler=<span class="hljs-string">"constant"</span> \
	--lr_warmup_steps=0 \
	--num_class_images=200 \
	--max_train_steps=800<!-- HTML_TAG_END --></pre></div>
	<h3 class="relative group"><a id="finetune-the-text-encoder-in-addition-to-the-unet" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#finetune-the-text-encoder-in-addition-to-the-unet"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Fine-tune the text encoder in addition to the UNet
	</span></h3>

	<p>The script also allows to fine-tune the <code>text_encoder</code> along with the <code>unet</code>. It has been observed experimentally that this gives much better results, especially on faces. Please, refer to <a href="https://huggingface.co/blog/dreambooth" rel="nofollow">our blog</a> for more details.</p>
	<p>To enable this option, pass the <code>--train_text_encoder</code> argument to the training script.</p>


	<div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400">Training the text encoder requires additional memory, so training won't fit on a 16GB GPU. You'll need at least 24GB VRAM to use this option.
	</div>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span>
	<span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"path_to_training_images"</span>
	<span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span>
	<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span>

	accelerate launch train_dreambooth.py \
	--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
	--train_text_encoder \
	--instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \
	--class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \
	--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
	--with_prior_preservation --prior_loss_weight=1.0 \
	--instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \
	--class_prompt=<span class="hljs-string">"a photo of dog"</span> \
	--resolution=512 \
	--train_batch_size=1 \
	--use_8bit_adam
	--gradient_checkpointing \
	--learning_rate=2e-6 \
	--lr_scheduler=<span class="hljs-string">"constant"</span> \
	--lr_warmup_steps=0 \
	--num_class_images=200 \
	--max_train_steps=800<!-- HTML_TAG_END --></pre></div>
	<h3 class="relative group"><a id="training-on-a-8-gb-gpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-on-a-8-gb-gpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Training on a 8 GB GPU:
	</span></h3>

	<p>Using <a href="https://www.deepspeed.ai/" rel="nofollow">DeepSpeed</a> it’s even possible to offload some
	tensors from VRAM to either CPU or NVME, allowing training to proceed with less GPU memory.</p>
	<p>DeepSpeed needs to be enabled with <code>accelerate config</code>. During configuration,
	answer yes to “Do you want to use DeepSpeed?“. Combining DeepSpeed stage 2, fp16
	mixed precision, and offloading both the model parameters and the optimizer state to CPU, it’s
	possible to train on under 8 GB VRAM. The drawback is that this requires more system RAM (about 25 GB). See <a href="https://huggingface.co/docs/accelerate/usage_guides/deepspeed" rel="nofollow">the DeepSpeed documentation</a> for more configuration options.</p>
	<p>Changing the default Adam optimizer to DeepSpeed’s special version of Adam
	<code>deepspeed.ops.adam.DeepSpeedCPUAdam</code> gives a substantial speedup, but enabling
	it requires the system’s CUDA toolchain version to be the same as the one installed with PyTorch. 8-bit optimizers don’t seem to be compatible with DeepSpeed at the moment.</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"CompVis/stable-diffusion-v1-4"</span>
	<span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"path_to_training_images"</span>
	<span class="hljs-built_in">export</span> CLASS_DIR=<span class="hljs-string">"path_to_class_images"</span>
	<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path_to_saved_model"</span>

	accelerate launch train_dreambooth.py \
	--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
	--instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \
	--class_data_dir=<span class="hljs-variable">$CLASS_DIR</span> \
	--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
	--with_prior_preservation --prior_loss_weight=1.0 \
	--instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \
	--class_prompt=<span class="hljs-string">"a photo of dog"</span> \
	--resolution=512 \
	--train_batch_size=1 \
	--sample_batch_size=1 \
	--gradient_accumulation_steps=1 --gradient_checkpointing \
	--learning_rate=5e-6 \
	--lr_scheduler=<span class="hljs-string">"constant"</span> \
	--lr_warmup_steps=0 \
	--num_class_images=200 \
	--max_train_steps=800 \
	--mixed_precision=fp16<!-- HTML_TAG_END --></pre></div>
	<h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
	<span>Inference
	</span></h2>

	<p>Once you have trained a model, inference can be done using the <code>StableDiffusionPipeline</code>, by simply indicating the path where the model was saved. Make sure that your prompts include the special <code>identifier</code> used during training (<code>sks</code> in the previous examples).</p>

	<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
	<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
	Copied</div></button></div>
	<pre><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline
	<span class="hljs-keyword">import</span> torch

	model_id = <span class="hljs-string">"path_to_saved_model"</span>
	pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>)

	prompt = <span class="hljs-string">"A photo of sks dog in a bucket"</span>
	image = pipe(prompt, num_inference_steps=<span class="hljs-number">50</span>, guidance_scale=<span class="hljs-number">7.5</span>).images[<span class="hljs-number">0</span>]

	image.save(<span class="hljs-string">"dog-bucket.png"</span>)<!-- HTML_TAG_END --></pre></div>


	<script type="module" data-hydrate="1941ea6">
	import { start } from "/docs/diffusers/v0.10.2/en/_app/start-hf-doc-builder.js";
	start({
	target: document.querySelector('[data-hydrate="1941ea6"]').parentNode,
	paths: {"base":"/docs/diffusers/v0.10.2/en","assets":"/docs/diffusers/v0.10.2/en"},
	session: {},
	route: false,
	spa: false,
	trailing_slash: "never",
	hydrate: {
	status: 200,
	error: null,
	nodes: [
	import("/docs/diffusers/v0.10.2/en/_app/pages/__layout.svelte-hf-doc-builder.js"),
	import("/docs/diffusers/v0.10.2/en/_app/pages/training/dreambooth.mdx-hf-doc-builder.js")
	],
	params: {}
	}
	});
	</script>

Xet Storage Details

Size:: 38 kB
Xet hash:: d352603071c144942c86500e2103cc1de7cfd963833e2607d25f9093408ac8a5

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.