Buckets:

rtrm's picture
download
raw
39.2 kB
<meta charset="utf-8" /><meta http-equiv="content-security-policy" content=""><meta name="hf:doc:metadata" content="{&quot;local&quot;:&quot;lowrank-adaptation-of-large-language-models-lora&quot;,&quot;sections&quot;:[{&quot;local&quot;:&quot;texttoimage&quot;,&quot;sections&quot;:[{&quot;local&quot;:&quot;text-to-image-training&quot;,&quot;title&quot;:&quot;Training&quot;},{&quot;local&quot;:&quot;text-to-image-inference&quot;,&quot;title&quot;:&quot;Inference&quot;}],&quot;title&quot;:&quot;Text-to-image&quot;},{&quot;local&quot;:&quot;dreambooth&quot;,&quot;sections&quot;:[{&quot;local&quot;:&quot;dreambooth-training&quot;,&quot;title&quot;:&quot;Training&quot;},{&quot;local&quot;:&quot;dreambooth-inference&quot;,&quot;title&quot;:&quot;Inference&quot;}],&quot;title&quot;:&quot;DreamBooth&quot;}],&quot;title&quot;:&quot;Low-Rank Adaptation of Large Language Models (LoRA)&quot;}" data-svelte="svelte-1phssyn">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/assets/pages/__layout.svelte-hf-doc-builder.css">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/start-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/chunks/vendor-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/chunks/paths-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/pages/__layout.svelte-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/pages/training/lora.mdx-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/chunks/Tip-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/chunks/IconCopyLink-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/chunks/CodeBlock-hf-doc-builder.js">
<link rel="modulepreload" href="/docs/diffusers/v0.16.0/en/_app/chunks/DocNotebookDropdown-hf-doc-builder.js">
<h1 class="relative group"><a id="lowrank-adaptation-of-large-language-models-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#lowrank-adaptation-of-large-language-models-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Low-Rank Adaptation of Large Language Models (LoRA)
</span></h1>
<div class="flex space-x-1 absolute z-10 right-0 top-0">
<div class="relative colab-dropdown ">
<button class=" " type="button">
<img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg">
</button>
</div>
<div class="relative colab-dropdown ">
<button class=" " type="button">
<img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg">
</button>
</div></div>
<div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p>Currently, LoRA is only supported for the attention layers of the <code>UNet2DConditionalModel</code>. We also
support LoRA fine-tuning of the text encoder for DreamBooth in a limited capacity. For more details on how we support
LoRA fine-tuning of the text encoder, refer to the discussion on <a href="https://github.com/huggingface/diffusers/pull/2918" rel="nofollow">this PR</a>. </p></div>
<p><a href="https://arxiv.org/abs/2106.09685" rel="nofollow">Low-Rank Adaptation of Large Language Models (LoRA)</a> is a training method that accelerates the training of large models while consuming less memory. It adds pairs of rank-decomposition weight matrices (called <strong>update matrices</strong>) to existing weights, and <strong>only</strong> trains those newly added weights. This has a couple of advantages:</p>
<ul><li>Previous pretrained weights are kept frozen so the model is not as prone to <a href="https://www.pnas.org/doi/10.1073/pnas.1611835114" rel="nofollow">catastrophic forgetting</a>.</li>
<li>Rank-decomposition matrices have significantly fewer parameters than the original model, which means that trained LoRA weights are easily portable.</li>
<li>LoRA matrices are generally added to the attention layers of the original model. 🧨 Diffusers provides the <a href="/docs/diffusers/v0.16.0/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs">load_attn_procs()</a> method to load the LoRA weights into a model’s attention layers. You can control the extent to which the model is adapted toward new training images via a <code>scale</code> parameter. </li>
<li>The greater memory-efficiency allows you to run fine-tuning on consumer GPUs like the Tesla T4, RTX 3080 or even the RTX 2080 Ti! GPUs like the T4 are free and readily accessible in Kaggle or Google Colab notebooks.</li></ul>
<div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p>💡 LoRA is not only limited to attention layers. The authors found that amending
the attention layers of a language model is sufficient to obtain good downstream performance with great efficiency. This is why it’s common to just add the LoRA weights to the attention layers of a model. Check out the <a href="https://huggingface.co/blog/lora" rel="nofollow">Using LoRA for efficient Stable Diffusion fine-tuning</a> blog for more information about how LoRA works!</p></div>
<p><a href="https://github.com/cloneofsimo" rel="nofollow">cloneofsimo</a> was the first to try out LoRA training for Stable Diffusion in the popular <a href="https://github.com/cloneofsimo/lora" rel="nofollow">lora</a> GitHub repository. 🧨 Diffusers now supports finetuning with LoRA for <a href="https://github.com/huggingface/diffusers/tree/main/examples/text_to_image#training-with-lora" rel="nofollow">text-to-image generation</a> and <a href="https://github.com/huggingface/diffusers/tree/main/examples/dreambooth#training-with-low-rank-adaptation-of-large-language-models-lora" rel="nofollow">DreamBooth</a>. This guide will show you how to do both.</p>
<p>If you’d like to store or share your model with the community, login to your Hugging Face account (create <a href="hf.co/join">one</a> if you don’t have one already):</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->huggingface-cli login<!-- HTML_TAG_END --></pre></div>
<h2 class="relative group"><a id="texttoimage" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#texttoimage"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Text-to-image
</span></h2>
<p>Finetuning a model like Stable Diffusion, which has billions of parameters, can be slow and difficult. With LoRA, it is much easier and faster to finetune a diffusion model. It can run on hardware with as little as 11GB of GPU RAM without resorting to tricks such as 8-bit optimizers.</p>
<h3 class="relative group"><a id="text-to-image-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Training
</span></h3>
<p>Let’s finetune <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>stable-diffusion-v1-5</code></a> on the <a href="https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions" rel="nofollow">Pokémon BLIP captions</a> dataset to generate your own Pokémon.</p>
<p>Specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <code>~diffusers.DiffusionPipeline.from_pretrained.pretrained_model_name_or_path</code> argument. You’ll also need to set the <code>DATASET_NAME</code> environment variable to the name of the dataset you want to train on.</p>
<p>The <code>OUTPUT_DIR</code> and <code>HUB_MODEL_ID</code> variables are optional and specify where to save the model to on the Hub:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;/sddata/finetune/lora/pokemon&quot;</span>
<span class="hljs-built_in">export</span> HUB_MODEL_ID=<span class="hljs-string">&quot;pokemon-lora&quot;</span>
<span class="hljs-built_in">export</span> DATASET_NAME=<span class="hljs-string">&quot;lambdalabs/pokemon-blip-captions&quot;</span><!-- HTML_TAG_END --></pre></div>
<p>There are some flags to be aware of before you start training:</p>
<ul><li><code>--push_to_hub</code> stores the trained LoRA embeddings on the Hub.</li>
<li><code>--report_to=wandb</code> reports and logs the training results to your Weights &amp; Biases dashboard (as an example, take a look at this <a href="https://wandb.ai/pcuenq/text2image-fine-tune/runs/b4k1w0tn?workspace=user-pcuenq" rel="nofollow">report</a>).</li>
<li><code>--learning_rate=1e-04</code>, you can afford to use a higher learning rate than you normally would with LoRA.</li></ul>
<p>Now you’re ready to launch the training (you can find the full training script <a href="https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image_lora.py" rel="nofollow">here</a>):</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->accelerate launch --mixed_precision=<span class="hljs-string">&quot;fp16&quot;</span> train_text_to_image_lora.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
--dataset_name=<span class="hljs-variable">$DATASET_NAME</span> \
--dataloader_num_workers=8 \
--resolution=512 --center_crop --random_flip \
--train_batch_size=1 \
--gradient_accumulation_steps=4 \
--max_train_steps=15000 \
--learning_rate=1e-04 \
--max_grad_norm=1 \
--lr_scheduler=<span class="hljs-string">&quot;cosine&quot;</span> --lr_warmup_steps=0 \
--output_dir=<span class="hljs-variable">${OUTPUT_DIR}</span> \
--push_to_hub \
--hub_model_id=<span class="hljs-variable">${HUB_MODEL_ID}</span> \
--report_to=wandb \
--checkpointing_steps=500 \
--validation_prompt=<span class="hljs-string">&quot;A pokemon with blue eyes.&quot;</span> \
--seed=1337<!-- HTML_TAG_END --></pre></div>
<h3 class="relative group"><a id="text-to-image-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Inference
</span></h3>
<p>Now you can use the model for inference by loading the base model in the <a href="/docs/diffusers/v0.16.0/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> and then the <a href="/docs/diffusers/v0.16.0/en/api/schedulers/multistep_dpm_solver#diffusers.DPMSolverMultistepScheduler">DPMSolverMultistepScheduler</a>:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline, DPMSolverMultistepScheduler
<span class="hljs-meta">&gt;&gt;&gt; </span>model_base = <span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe = StableDiffusionPipeline.from_pretrained(model_base, torch_dtype=torch.float16)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)<!-- HTML_TAG_END --></pre></div>
<p>Load the LoRA weights from your finetuned model <em>on top of the base model weights</em>, and then move the pipeline to a GPU for faster inference. When you merge the LoRA weights with the frozen pretrained model weights, you can optionally adjust how much of the weights to merge with the <code>scale</code> parameter:</p>
<div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p>💡 A <code>scale</code> value of <code>0</code> is the same as not using your LoRA weights and you’re only using the base model weights, and a <code>scale</code> value of <code>1</code> means you’re only using the fully finetuned LoRA weights. Values between <code>0</code> and <code>1</code> interpolates between the two weights.</p></div>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>pipe.unet.load_attn_procs(model_path)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># use half the weights from the LoRA finetuned model and half the weights from the base model</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = pipe(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;A pokemon with blue eyes.&quot;</span>, num_inference_steps=<span class="hljs-number">25</span>, guidance_scale=<span class="hljs-number">7.5</span>, cross_attention_kwargs={<span class="hljs-string">&quot;scale&quot;</span>: <span class="hljs-number">0.5</span>}
<span class="hljs-meta">... </span>).images[<span class="hljs-number">0</span>]
<span class="hljs-comment"># use the weights from the fully finetuned LoRA model</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = pipe(<span class="hljs-string">&quot;A pokemon with blue eyes.&quot;</span>, num_inference_steps=<span class="hljs-number">25</span>, guidance_scale=<span class="hljs-number">7.5</span>).images[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>image.save(<span class="hljs-string">&quot;blue_pokemon.png&quot;</span>)<!-- HTML_TAG_END --></pre></div>
<h2 class="relative group"><a id="dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>DreamBooth
</span></h2>
<p><a href="https://arxiv.org/abs/2208.12242" rel="nofollow">DreamBooth</a> is a finetuning technique for personalizing a text-to-image model like Stable Diffusion to generate photorealistic images of a subject in different contexts, given a few images of the subject. However, DreamBooth is very sensitive to hyperparameters and it is easy to overfit. Some important hyperparameters to consider include those that affect the training time (learning rate, number of training steps), and inference time (number of steps, scheduler type).</p>
<div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p>💡 Take a look at the <a href="https://huggingface.co/blog/dreambooth" rel="nofollow">Training Stable Diffusion with DreamBooth using 🧨 Diffusers</a> blog for an in-depth analysis of DreamBooth experiments and recommended settings.</p></div>
<h3 class="relative group"><a id="dreambooth-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Training
</span></h3>
<p>Let’s finetune <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>stable-diffusion-v1-5</code></a> with DreamBooth and LoRA with some 🐶 <a href="https://drive.google.com/drive/folders/1BO_dyz-p65qhBRRMRA4TbZ8qW4rB99JZ" rel="nofollow">dog images</a>. Download and save these images to a directory.</p>
<p>To start, specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <code>~diffusers.DiffusionPipeline.from_pretrained.pretrained_model_name_or_path</code> argument. You’ll also need to set <code>INSTANCE_DIR</code> to the path of the directory containing the images. </p>
<p>The <code>OUTPUT_DIR</code> variables is optional and specifies where to save the model to on the Hub:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">&quot;path-to-instance-images&quot;</span>
<span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">&quot;path-to-save-model&quot;</span><!-- HTML_TAG_END --></pre></div>
<p>There are some flags to be aware of before you start training:</p>
<ul><li><code>--push_to_hub</code> stores the trained LoRA embeddings on the Hub.</li>
<li><code>--report_to=wandb</code> reports and logs the training results to your Weights &amp; Biases dashboard (as an example, take a look at this <a href="https://wandb.ai/pcuenq/text2image-fine-tune/runs/b4k1w0tn?workspace=user-pcuenq" rel="nofollow">report</a>).</li>
<li><code>--learning_rate=1e-04</code>, you can afford to use a higher learning rate than you normally would with LoRA.</li></ul>
<p>Now you’re ready to launch the training (you can find the full training script <a href="https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_lora.py" rel="nofollow">here</a>):</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START -->accelerate launch train_dreambooth_lora.py \
--pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \
--instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \
--output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \
--instance_prompt=<span class="hljs-string">&quot;a photo of sks dog&quot;</span> \
--resolution=512 \
--train_batch_size=1 \
--gradient_accumulation_steps=1 \
--checkpointing_steps=100 \
--learning_rate=1e-4 \
--report_to=<span class="hljs-string">&quot;wandb&quot;</span> \
--lr_scheduler=<span class="hljs-string">&quot;constant&quot;</span> \
--lr_warmup_steps=0 \
--max_train_steps=500 \
--validation_prompt=<span class="hljs-string">&quot;A photo of sks dog in a bucket&quot;</span> \
--validation_epochs=50 \
--seed=<span class="hljs-string">&quot;0&quot;</span> \
--push_to_hub<!-- HTML_TAG_END --></pre></div>
<p>It’s also possible to additionally fine-tune the text encoder with LoRA. This, in most cases, leads
to better results with a slight increase in the compute. To allow fine-tuning the text encoder with LoRA,
specify the <code>--train_text_encoder</code> while launching the <code>train_dreambooth_lora.py</code> script. </p>
<h3 class="relative group"><a id="dreambooth-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a>
<span>Inference
</span></h3>
<p>Now you can use the model for inference by loading the base model in the <a href="/docs/diffusers/v0.16.0/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>:</p>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>model_base = <span class="hljs-string">&quot;runwayml/stable-diffusion-v1-5&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe = StableDiffusionPipeline.from_pretrained(model_base, torch_dtype=torch.float16)<!-- HTML_TAG_END --></pre></div>
<p>Load the LoRA weights from your finetuned DreamBooth model <em>on top of the base model weights</em>, and then move the pipeline to a GPU for faster inference. When you merge the LoRA weights with the frozen pretrained model weights, you can optionally adjust how much of the weights to merge with the <code>scale</code> parameter:</p>
<div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p>💡 A <code>scale</code> value of <code>0</code> is the same as not using your LoRA weights and you’re only using the base model weights, and a <code>scale</code> value of <code>1</code> means you’re only using the fully finetuned LoRA weights. Values between <code>0</code> and <code>1</code> interpolates between the two weights.</p></div>
<div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg>
<div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div>
Copied</div></button></div>
<pre><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>pipe.unet.load_attn_procs(model_path)
<span class="hljs-meta">&gt;&gt;&gt; </span>pipe.to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-comment"># use half the weights from the LoRA finetuned model and half the weights from the base model</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = pipe(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;A picture of a sks dog in a bucket.&quot;</span>,
<span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">25</span>,
<span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">7.5</span>,
<span class="hljs-meta">... </span> cross_attention_kwargs={<span class="hljs-string">&quot;scale&quot;</span>: <span class="hljs-number">0.5</span>},
<span class="hljs-meta">... </span>).images[<span class="hljs-number">0</span>]
<span class="hljs-comment"># use the weights from the fully finetuned LoRA model</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = pipe(<span class="hljs-string">&quot;A picture of a sks dog in a bucket.&quot;</span>, num_inference_steps=<span class="hljs-number">25</span>, guidance_scale=<span class="hljs-number">7.5</span>).images[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>image.save(<span class="hljs-string">&quot;bucket-dog.png&quot;</span>)<!-- HTML_TAG_END --></pre></div>
<script type="module" data-hydrate="7wr7ih">
import { start } from "/docs/diffusers/v0.16.0/en/_app/start-hf-doc-builder.js";
start({
target: document.querySelector('[data-hydrate="7wr7ih"]').parentNode,
paths: {"base":"/docs/diffusers/v0.16.0/en","assets":"/docs/diffusers/v0.16.0/en"},
session: {},
route: false,
spa: false,
trailing_slash: "never",
hydrate: {
status: 200,
error: null,
nodes: [
import("/docs/diffusers/v0.16.0/en/_app/pages/__layout.svelte-hf-doc-builder.js"),
import("/docs/diffusers/v0.16.0/en/_app/pages/training/lora.mdx-hf-doc-builder.js")
],
params: {}
}
});
</script>

Xet Storage Details

Size:
39.2 kB
·
Xet hash:
6481d7a20c3a1ed2c53ad75de0168373c29c9553f432f9b68cb045a9f426854e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.