Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Low-Rank Adaptation of Large Language Models (LoRA)","local":"low-rank-adaptation-of-large-language-models-lora","sections":[{"title":"Text-to-image","local":"text-to-image","sections":[{"title":"Training","local":"text-to-image-training","sections":[],"depth":3},{"title":"Inference","local":"text-to-image-inference","sections":[],"depth":3}],"depth":2},{"title":"DreamBooth","local":"dreambooth","sections":[{"title":"Training","local":"dreambooth-training","sections":[],"depth":3},{"title":"Inference","local":"dreambooth-inference","sections":[],"depth":3}],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2},{"title":"Unloading LoRA parameters","local":"unloading-lora-parameters","sections":[],"depth":2},{"title":"Fusing LoRA parameters","local":"fusing-lora-parameters","sections":[],"depth":2},{"title":"Unfusing LoRA parameters","local":"unfusing-lora-parameters","sections":[],"depth":2},{"title":"Working with different LoRA scales when using LoRA fusion","local":"working-with-different-lora-scales-when-using-lora-fusion","sections":[],"depth":2},{"title":"Serializing pipelines with fused LoRA parameters","local":"serializing-pipelines-with-fused-lora-parameters","sections":[],"depth":2},{"title":"Working with multiple LoRA checkpoints","local":"working-with-multiple-lora-checkpoints","sections":[],"depth":2},{"title":"Supporting different LoRA checkpoints from Diffusers","local":"supporting-different-lora-checkpoints-from-diffusers","sections":[{"title":"Kohya","local":"kohya","sections":[],"depth":3},{"title":"Kohya + Stable Diffusion XL","local":"kohya--stable-diffusion-xl","sections":[],"depth":3},{"title":"TheLastBen","local":"thelastben","sections":[],"depth":3}],"depth":2}],"depth":1}"> | |
| <link href="/docs/diffusers/v0.22.2/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/scheduler.182ea377.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/singletons.60172a60.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.1f6d62f6.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/paths.49cddc6d.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/index.abf12888.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/0.efabe74f.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/nodes/141.3248fb7a.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Tip.230e2334.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/CodeBlock.57fe6e13.js"> | |
| <link rel="modulepreload" href="/docs/diffusers/v0.22.2/en/_app/immutable/chunks/Heading.16916d63.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Low-Rank Adaptation of Large Language Models (LoRA)","local":"low-rank-adaptation-of-large-language-models-lora","sections":[{"title":"Text-to-image","local":"text-to-image","sections":[{"title":"Training","local":"text-to-image-training","sections":[],"depth":3},{"title":"Inference","local":"text-to-image-inference","sections":[],"depth":3}],"depth":2},{"title":"DreamBooth","local":"dreambooth","sections":[{"title":"Training","local":"dreambooth-training","sections":[],"depth":3},{"title":"Inference","local":"dreambooth-inference","sections":[],"depth":3}],"depth":2},{"title":"Stable Diffusion XL","local":"stable-diffusion-xl","sections":[],"depth":2},{"title":"Unloading LoRA parameters","local":"unloading-lora-parameters","sections":[],"depth":2},{"title":"Fusing LoRA parameters","local":"fusing-lora-parameters","sections":[],"depth":2},{"title":"Unfusing LoRA parameters","local":"unfusing-lora-parameters","sections":[],"depth":2},{"title":"Working with different LoRA scales when using LoRA fusion","local":"working-with-different-lora-scales-when-using-lora-fusion","sections":[],"depth":2},{"title":"Serializing pipelines with fused LoRA parameters","local":"serializing-pipelines-with-fused-lora-parameters","sections":[],"depth":2},{"title":"Working with multiple LoRA checkpoints","local":"working-with-multiple-lora-checkpoints","sections":[],"depth":2},{"title":"Supporting different LoRA checkpoints from Diffusers","local":"supporting-different-lora-checkpoints-from-diffusers","sections":[{"title":"Kohya","local":"kohya","sections":[],"depth":3},{"title":"Kohya + Stable Diffusion XL","local":"kohya--stable-diffusion-xl","sections":[],"depth":3},{"title":"TheLastBen","local":"thelastben","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="low-rank-adaptation-of-large-language-models-lora" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#low-rank-adaptation-of-large-language-models-lora"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Low-Rank Adaptation of Large Language Models (LoRA)</span></h1> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-zcd81y">This is an experimental feature. Its APIs can change in future.</p></div> <p data-svelte-h="svelte-162fd4o"><a href="https://arxiv.org/abs/2106.09685" rel="nofollow">Low-Rank Adaptation of Large Language Models (LoRA)</a> is a training method that accelerates the training of large models while consuming less memory. It adds pairs of rank-decomposition weight matrices (called <strong>update matrices</strong>) to existing weights, and <strong>only</strong> trains those newly added weights. This has a couple of advantages:</p> <ul data-svelte-h="svelte-10kaohq"><li>Previous pretrained weights are kept frozen so the model is not as prone to <a href="https://www.pnas.org/doi/10.1073/pnas.1611835114" rel="nofollow">catastrophic forgetting</a>.</li> <li>Rank-decomposition matrices have significantly fewer parameters than the original model, which means that trained LoRA weights are easily portable.</li> <li>LoRA matrices are generally added to the attention layers of the original model. 🧨 Diffusers provides the <a href="/docs/diffusers/v0.22.2/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs">load_attn_procs()</a> method to load the LoRA weights into a model’s attention layers. You can control the extent to which the model is adapted toward new training images via a <code>scale</code> parameter.</li> <li>The greater memory-efficiency allows you to run fine-tuning on consumer GPUs like the Tesla T4, RTX 3080 or even the RTX 2080 Ti! GPUs like the T4 are free and readily accessible in Kaggle or Google Colab notebooks.</li></ul> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1eow968">💡 LoRA is not only limited to attention layers. The authors found that amending | |
| the attention layers of a language model is sufficient to obtain good downstream performance with great efficiency. This is why it’s common to just add the LoRA weights to the attention layers of a model. Check out the <a href="https://huggingface.co/blog/lora" rel="nofollow">Using LoRA for efficient Stable Diffusion fine-tuning</a> blog for more information about how LoRA works!</p></div> <p data-svelte-h="svelte-1mrdz4f"><a href="https://github.com/cloneofsimo" rel="nofollow">cloneofsimo</a> was the first to try out LoRA training for Stable Diffusion in the popular <a href="https://github.com/cloneofsimo/lora" rel="nofollow">lora</a> GitHub repository. 🧨 Diffusers now supports finetuning with LoRA for <a href="https://github.com/huggingface/diffusers/tree/main/examples/text_to_image#training-with-lora" rel="nofollow">text-to-image generation</a> and <a href="https://github.com/huggingface/diffusers/tree/main/examples/dreambooth#training-with-low-rank-adaptation-of-large-language-models-lora" rel="nofollow">DreamBooth</a>. This guide will show you how to do both.</p> <p data-svelte-h="svelte-g6vnky">If you’d like to store or share your model with the community, login to your Hugging Face account (create <a href="https://hf.co/join" rel="nofollow">one</a> if you don’t have one already):</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->huggingface-cli login<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="text-to-image" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Text-to-image</span></h2> <p data-svelte-h="svelte-10agb32">Finetuning a model like Stable Diffusion, which has billions of parameters, can be slow and difficult. With LoRA, it is much easier and faster to finetune a diffusion model. It can run on hardware with as little as 11GB of GPU RAM without resorting to tricks such as 8-bit optimizers.</p> <h3 class="relative group"><a id="text-to-image-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training</span></h3> <p data-svelte-h="svelte-1lh1npc">Let’s finetune <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>stable-diffusion-v1-5</code></a> on the <a href="https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions" rel="nofollow">Pokémon BLIP captions</a> dataset to generate your own Pokémon.</p> <p data-svelte-h="svelte-mnv49s">Specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <a href="https://huggingface.co/docs/diffusers/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained.pretrained_model_name_or_path" rel="nofollow"><code>pretrained_model_name_or_path</code></a> argument. You’ll also need to set the <code>DATASET_NAME</code> environment variable to the name of the dataset you want to train on. To use your own dataset, take a look at the <a href="create_dataset">Create a dataset for training</a> guide.</p> <p data-svelte-h="svelte-np14fc">The <code>OUTPUT_DIR</code> and <code>HUB_MODEL_ID</code> variables are optional and specify where to save the model to on the Hub:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"/sddata/finetune/lora/pokemon"</span> | |
| <span class="hljs-built_in">export</span> HUB_MODEL_ID=<span class="hljs-string">"pokemon-lora"</span> | |
| <span class="hljs-built_in">export</span> DATASET_NAME=<span class="hljs-string">"lambdalabs/pokemon-blip-captions"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-vjf3go">There are some flags to be aware of before you start training:</p> <ul data-svelte-h="svelte-11vsf6h"><li><code>--push_to_hub</code> stores the trained LoRA embeddings on the Hub.</li> <li><code>--report_to=wandb</code> reports and logs the training results to your Weights & Biases dashboard (as an example, take a look at this <a href="https://wandb.ai/pcuenq/text2image-fine-tune/runs/b4k1w0tn?workspace=user-pcuenq" rel="nofollow">report</a>).</li> <li><code>--learning_rate=1e-04</code>, you can afford to use a higher learning rate than you normally would with LoRA.</li></ul> <p data-svelte-h="svelte-1u3y683">Now you’re ready to launch the training (you can find the full training script <a href="https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/train_text_to_image_lora.py" rel="nofollow">here</a>). Training takes about 5 hours on a 2080 Ti GPU with 11GB of RAM, and it’ll create and save model checkpoints and the <code>pytorch_lora_weights</code> in your repository.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch --mixed_precision=<span class="hljs-string">"fp16"</span> train_text_to_image_lora.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --dataset_name=<span class="hljs-variable">$DATASET_NAME</span> \ | |
| --dataloader_num_workers=8 \ | |
| --resolution=512 --center_crop --random_flip \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=4 \ | |
| --max_train_steps=15000 \ | |
| --learning_rate=1e-04 \ | |
| --max_grad_norm=1 \ | |
| --lr_scheduler=<span class="hljs-string">"cosine"</span> --lr_warmup_steps=0 \ | |
| --output_dir=<span class="hljs-variable">${OUTPUT_DIR}</span> \ | |
| --push_to_hub \ | |
| --hub_model_id=<span class="hljs-variable">${HUB_MODEL_ID}</span> \ | |
| --report_to=wandb \ | |
| --checkpointing_steps=500 \ | |
| --validation_prompt=<span class="hljs-string">"A pokemon with blue eyes."</span> \ | |
| --seed=1337<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="text-to-image-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#text-to-image-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference</span></h3> <p data-svelte-h="svelte-l009s5">Now you can use the model for inference by loading the base model in the <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a> and then the <a href="/docs/diffusers/v0.22.2/en/api/schedulers/multistep_dpm_solver#diffusers.DPMSolverMultistepScheduler">DPMSolverMultistepScheduler</a>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline, DPMSolverMultistepScheduler | |
| <span class="hljs-meta">>>> </span>model_base = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-meta">>>> </span>pipe = StableDiffusionPipeline.from_pretrained(model_base, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span>pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1c0cjnc">Load the LoRA weights from your finetuned model <em>on top of the base model weights</em>, and then move the pipeline to a GPU for faster inference. When you merge the LoRA weights with the frozen pretrained model weights, you can optionally adjust how much of the weights to merge with the <code>scale</code> parameter:</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-17q39x7">💡 A <code>scale</code> value of <code>0</code> is the same as not using your LoRA weights and you’re only using the base model weights, and a <code>scale</code> value of <code>1</code> means you’re only using the fully finetuned LoRA weights. Values between <code>0</code> and <code>1</code> interpolates between the two weights.</p></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>pipe.unet.load_attn_procs(lora_model_path) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># use half the weights from the LoRA finetuned model and half the weights from the base model</span> | |
| <span class="hljs-meta">>>> </span>image = pipe( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"A pokemon with blue eyes."</span>, num_inference_steps=<span class="hljs-number">25</span>, guidance_scale=<span class="hljs-number">7.5</span>, cross_attention_kwargs={<span class="hljs-string">"scale"</span>: <span class="hljs-number">0.5</span>} | |
| <span class="hljs-meta">... </span>).images[<span class="hljs-number">0</span>] | |
| <span class="hljs-comment"># use the weights from the fully finetuned LoRA model</span> | |
| <span class="hljs-meta">>>> </span>image = pipe(<span class="hljs-string">"A pokemon with blue eyes."</span>, num_inference_steps=<span class="hljs-number">25</span>, guidance_scale=<span class="hljs-number">7.5</span>).images[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>image.save(<span class="hljs-string">"blue_pokemon.png"</span>)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1108nls">If you are loading the LoRA parameters from the Hub and if the Hub repository has | |
| a <code>base_model</code> tag (such as <a href="https://huggingface.co/sayakpaul/sd-model-finetuned-lora-t4/blob/main/README.md?code=true#L4" rel="nofollow">this</a>), then | |
| you can do:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub.repocard <span class="hljs-keyword">import</span> RepoCard | |
| lora_model_id = <span class="hljs-string">"sayakpaul/sd-model-finetuned-lora-t4"</span> | |
| card = RepoCard.load(lora_model_id) | |
| base_model_id = card.data.to_dict()[<span class="hljs-string">"base_model"</span>] | |
| pipe = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>) | |
| ...<!-- HTML_TAG_END --></pre></div></div> <h2 class="relative group"><a id="dreambooth" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DreamBooth</span></h2> <p data-svelte-h="svelte-kko4zn"><a href="https://arxiv.org/abs/2208.12242" rel="nofollow">DreamBooth</a> is a finetuning technique for personalizing a text-to-image model like Stable Diffusion to generate photorealistic images of a subject in different contexts, given a few images of the subject. However, DreamBooth is very sensitive to hyperparameters and it is easy to overfit. Some important hyperparameters to consider include those that affect the training time (learning rate, number of training steps), and inference time (number of steps, scheduler type).</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-9rn216">💡 Take a look at the <a href="https://huggingface.co/blog/dreambooth" rel="nofollow">Training Stable Diffusion with DreamBooth using 🧨 Diffusers</a> blog for an in-depth analysis of DreamBooth experiments and recommended settings.</p></div> <h3 class="relative group"><a id="dreambooth-training" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth-training"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training</span></h3> <p data-svelte-h="svelte-fgnnhh">Let’s finetune <a href="https://huggingface.co/runwayml/stable-diffusion-v1-5" rel="nofollow"><code>stable-diffusion-v1-5</code></a> with DreamBooth and LoRA with some 🐶 <a href="https://drive.google.com/drive/folders/1BO_dyz-p65qhBRRMRA4TbZ8qW4rB99JZ" rel="nofollow">dog images</a>. Download and save these images to a directory. To use your own dataset, take a look at the <a href="create_dataset">Create a dataset for training</a> guide.</p> <p data-svelte-h="svelte-1ep7b0t">To start, specify the <code>MODEL_NAME</code> environment variable (either a Hub model repository id or a path to the directory containing the model weights) and pass it to the <a href="https://huggingface.co/docs/diffusers/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained.pretrained_model_name_or_path" rel="nofollow"><code>pretrained_model_name_or_path</code></a> argument. You’ll also need to set <code>INSTANCE_DIR</code> to the path of the directory containing the images.</p> <p data-svelte-h="svelte-foqllp">The <code>OUTPUT_DIR</code> variables is optional and specifies where to save the model to on the Hub:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-built_in">export</span> MODEL_NAME=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-built_in">export</span> INSTANCE_DIR=<span class="hljs-string">"path-to-instance-images"</span> | |
| <span class="hljs-built_in">export</span> OUTPUT_DIR=<span class="hljs-string">"path-to-save-model"</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-vjf3go">There are some flags to be aware of before you start training:</p> <ul data-svelte-h="svelte-11vsf6h"><li><code>--push_to_hub</code> stores the trained LoRA embeddings on the Hub.</li> <li><code>--report_to=wandb</code> reports and logs the training results to your Weights & Biases dashboard (as an example, take a look at this <a href="https://wandb.ai/pcuenq/text2image-fine-tune/runs/b4k1w0tn?workspace=user-pcuenq" rel="nofollow">report</a>).</li> <li><code>--learning_rate=1e-04</code>, you can afford to use a higher learning rate than you normally would with LoRA.</li></ul> <p data-svelte-h="svelte-1ohvlep">Now you’re ready to launch the training (you can find the full training script <a href="https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_lora.py" rel="nofollow">here</a>). The script creates and saves model checkpoints and the <code>pytorch_lora_weights.bin</code> file in your repository.</p> <p data-svelte-h="svelte-13qtcom">It’s also possible to additionally fine-tune the text encoder with LoRA. This, in most cases, leads | |
| to better results with a slight increase in the compute. To allow fine-tuning the text encoder with LoRA, | |
| specify the <code>--train_text_encoder</code> while launching the <code>train_dreambooth_lora.py</code> script.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch train_dreambooth_lora.py \ | |
| --pretrained_model_name_or_path=<span class="hljs-variable">$MODEL_NAME</span> \ | |
| --instance_data_dir=<span class="hljs-variable">$INSTANCE_DIR</span> \ | |
| --output_dir=<span class="hljs-variable">$OUTPUT_DIR</span> \ | |
| --instance_prompt=<span class="hljs-string">"a photo of sks dog"</span> \ | |
| --resolution=512 \ | |
| --train_batch_size=1 \ | |
| --gradient_accumulation_steps=1 \ | |
| --checkpointing_steps=100 \ | |
| --learning_rate=1e-4 \ | |
| --report_to=<span class="hljs-string">"wandb"</span> \ | |
| --lr_scheduler=<span class="hljs-string">"constant"</span> \ | |
| --lr_warmup_steps=0 \ | |
| --max_train_steps=500 \ | |
| --validation_prompt=<span class="hljs-string">"A photo of sks dog in a bucket"</span> \ | |
| --validation_epochs=50 \ | |
| --seed=<span class="hljs-string">"0"</span> \ | |
| --push_to_hub<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="dreambooth-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dreambooth-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference</span></h3> <p data-svelte-h="svelte-27dazk">Now you can use the model for inference by loading the base model in the <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline">StableDiffusionPipeline</a>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-meta">>>> </span>model_base = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| <span class="hljs-meta">>>> </span>pipe = StableDiffusionPipeline.from_pretrained(model_base, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-x23tn5">Load the LoRA weights from your finetuned DreamBooth model <em>on top of the base model weights</em>, and then move the pipeline to a GPU for faster inference. When you merge the LoRA weights with the frozen pretrained model weights, you can optionally adjust how much of the weights to merge with the <code>scale</code> parameter:</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-17q39x7">💡 A <code>scale</code> value of <code>0</code> is the same as not using your LoRA weights and you’re only using the base model weights, and a <code>scale</code> value of <code>1</code> means you’re only using the fully finetuned LoRA weights. Values between <code>0</code> and <code>1</code> interpolates between the two weights.</p></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>pipe.unet.load_attn_procs(lora_model_path) | |
| <span class="hljs-meta">>>> </span>pipe.to(<span class="hljs-string">"cuda"</span>) | |
| <span class="hljs-comment"># use half the weights from the LoRA finetuned model and half the weights from the base model</span> | |
| <span class="hljs-meta">>>> </span>image = pipe( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"A picture of a sks dog in a bucket."</span>, | |
| <span class="hljs-meta">... </span> num_inference_steps=<span class="hljs-number">25</span>, | |
| <span class="hljs-meta">... </span> guidance_scale=<span class="hljs-number">7.5</span>, | |
| <span class="hljs-meta">... </span> cross_attention_kwargs={<span class="hljs-string">"scale"</span>: <span class="hljs-number">0.5</span>}, | |
| <span class="hljs-meta">... </span>).images[<span class="hljs-number">0</span>] | |
| <span class="hljs-comment"># use the weights from the fully finetuned LoRA model</span> | |
| <span class="hljs-meta">>>> </span>image = pipe(<span class="hljs-string">"A picture of a sks dog in a bucket."</span>, num_inference_steps=<span class="hljs-number">25</span>, guidance_scale=<span class="hljs-number">7.5</span>).images[<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>image.save(<span class="hljs-string">"bucket-dog.png"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1x0rrf1">If you used <code>--train_text_encoder</code> during training, then use <code>pipe.load_lora_weights()</code> to load the LoRA | |
| weights. For example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub.repocard <span class="hljs-keyword">import</span> RepoCard | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| lora_model_id = <span class="hljs-string">"sayakpaul/dreambooth-text-encoder-test"</span> | |
| card = RepoCard.load(lora_model_id) | |
| base_model_id = card.data.to_dict()[<span class="hljs-string">"base_model"</span>] | |
| pipe = StableDiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span>) | |
| pipe = pipe.to(<span class="hljs-string">"cuda"</span>) | |
| pipe.load_lora_weights(lora_model_id) | |
| image = pipe(<span class="hljs-string">"A picture of a sks dog in a bucket"</span>, num_inference_steps=<span class="hljs-number">25</span>).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-r01i5t">If your LoRA parameters involve the UNet as well as the Text Encoder, then passing | |
| <code>cross_attention_kwargs={"scale": 0.5}</code> will apply the <code>scale</code> value to both the UNet | |
| and the Text Encoder.</p></div> <p data-svelte-h="svelte-ju718z">Note that the use of <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline.load_lora_weights">load_lora_weights()</a> is preferred to <a href="/docs/diffusers/v0.22.2/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs">load_attn_procs()</a> for loading LoRA parameters. This is because | |
| <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline.load_lora_weights">load_lora_weights()</a> can handle the following situations:</p> <ul><li><p data-svelte-h="svelte-1fzrg1n">LoRA parameters that don’t have separate identifiers for the UNet and the text encoder (such as <a href="https://huggingface.co/patrickvonplaten/lora_dreambooth_dog_example" rel="nofollow"><code>"patrickvonplaten/lora_dreambooth_dog_example"</code></a>). So, you can just do:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipe.load_lora_weights(lora_model_path)<!-- HTML_TAG_END --></pre></div></li> <li data-svelte-h="svelte-1smxhmx"><p>LoRA parameters that have separate identifiers for the UNet and the text encoder such as: <a href="https://huggingface.co/sayakpaul/dreambooth" rel="nofollow"><code>"sayakpaul/dreambooth"</code></a>.</p></li></ul> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-14f33g6">You can also provide a local directory path to <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline.load_lora_weights">load_lora_weights()</a> as well as <a href="/docs/diffusers/v0.22.2/en/api/loaders#diffusers.loaders.UNet2DConditionLoadersMixin.load_attn_procs">load_attn_procs()</a>.</p></div> <h2 class="relative group"><a id="stable-diffusion-xl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#stable-diffusion-xl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Stable Diffusion XL</span></h2> <p data-svelte-h="svelte-xe4oxf">We support fine-tuning with <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">Stable Diffusion XL</a>. Please refer to the following docs:</p> <ul data-svelte-h="svelte-19vwamc"><li><a href="https://github.com/huggingface/diffusers/blob/main/examples/text_to_image/README_sdxl.md" rel="nofollow">text_to_image/README_sdxl.md</a></li> <li><a href="https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/README_sdxl.md" rel="nofollow">dreambooth/README_sdxl.md</a></li></ul> <h2 class="relative group"><a id="unloading-lora-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unloading-lora-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Unloading LoRA parameters</span></h2> <p data-svelte-h="svelte-1u7alvh">You can call <a href="/docs/diffusers/v0.22.2/en/api/loaders#diffusers.loaders.LoraLoaderMixin.unload_lora_weights">unload_lora_weights()</a> on a pipeline to unload the LoRA parameters.</p> <h2 class="relative group"><a id="fusing-lora-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#fusing-lora-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Fusing LoRA parameters</span></h2> <p data-svelte-h="svelte-i6qm0r">You can call <a href="/docs/diffusers/v0.22.2/en/api/loaders#diffusers.loaders.LoraLoaderMixin.fuse_lora">fuse_lora()</a> on a pipeline to merge the LoRA parameters with the original parameters of the underlying model(s). This can lead to a potential speedup in the inference latency.</p> <h2 class="relative group"><a id="unfusing-lora-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#unfusing-lora-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Unfusing LoRA parameters</span></h2> <p data-svelte-h="svelte-1ihu1nb">To undo <code>fuse_lora</code>, call <a href="/docs/diffusers/v0.22.2/en/api/loaders#diffusers.loaders.LoraLoaderMixin.unfuse_lora">unfuse_lora()</a> on a pipeline.</p> <h2 class="relative group"><a id="working-with-different-lora-scales-when-using-lora-fusion" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#working-with-different-lora-scales-when-using-lora-fusion"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Working with different LoRA scales when using LoRA fusion</span></h2> <p data-svelte-h="svelte-1739hjj">If you need to use <code>scale</code> when working with <code>fuse_lora()</code> to control the influence of the LoRA parameters on the outputs, you should specify <code>lora_scale</code> within <code>fuse_lora()</code>. Passing the <code>scale</code> parameter to <code>cross_attention_kwargs</code> when you call the pipeline won’t work.</p> <p data-svelte-h="svelte-rw21g0">To use a different <code>lora_scale</code> with <code>fuse_lora()</code>, you should first call <code>unfuse_lora()</code> on the corresponding pipeline and call <code>fuse_lora()</code> again with the expected <code>lora_scale</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| lora_model_id = <span class="hljs-string">"hf-internal-testing/sdxl-1.0-lora"</span> | |
| lora_filename = <span class="hljs-string">"sd_xl_offset_example-lora_1.0.safetensors"</span> | |
| pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) | |
| <span class="hljs-comment"># This uses a default `lora_scale` of 1.0.</span> | |
| pipe.fuse_lora() | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| images_fusion = pipe( | |
| <span class="hljs-string">"masterpiece, best quality, mountain"</span>, generator=generator, num_inference_steps=<span class="hljs-number">2</span> | |
| ).images | |
| <span class="hljs-comment"># To work with a different `lora_scale`, first reverse the effects of `fuse_lora()`.</span> | |
| pipe.unfuse_lora() | |
| <span class="hljs-comment"># Then proceed as follows.</span> | |
| pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) | |
| pipe.fuse_lora(lora_scale=<span class="hljs-number">0.5</span>) | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| images_fusion = pipe( | |
| <span class="hljs-string">"masterpiece, best quality, mountain"</span>, generator=generator, num_inference_steps=<span class="hljs-number">2</span> | |
| ).images<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="serializing-pipelines-with-fused-lora-parameters" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#serializing-pipelines-with-fused-lora-parameters"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Serializing pipelines with fused LoRA parameters</span></h2> <p data-svelte-h="svelte-scdyk5">Let’s say you want to load the pipeline above that has its UNet fused with the LoRA parameters. You can easily do so by simply calling the <code>save_pretrained()</code> method on <code>pipe</code>.</p> <p data-svelte-h="svelte-1ktiavg">After loading the LoRA parameters into a pipeline, if you want to serialize the pipeline such that the affected model components are already fused with the LoRA parameters, you should:</p> <ul data-svelte-h="svelte-lf7at2"><li>call <code>fuse_lora()</code> on the pipeline with the desired <code>lora_scale</code>, given you’ve already loaded the LoRA parameters into it.</li> <li>call <code>save_pretrained()</code> on the pipeline.</li></ul> <p data-svelte-h="svelte-1plkp10">Here is a complete example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| lora_model_id = <span class="hljs-string">"hf-internal-testing/sdxl-1.0-lora"</span> | |
| lora_filename = <span class="hljs-string">"sd_xl_offset_example-lora_1.0.safetensors"</span> | |
| pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) | |
| <span class="hljs-comment"># First, fuse the LoRA parameters.</span> | |
| pipe.fuse_lora() | |
| <span class="hljs-comment"># Then save.</span> | |
| pipe.save_pretrained(<span class="hljs-string">"my-pipeline-with-fused-lora"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-17ca37g">Now, you can load the pipeline and directly perform inference without having to load the LoRA parameters again:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = DiffusionPipeline.from_pretrained(<span class="hljs-string">"my-pipeline-with-fused-lora"</span>, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| generator = torch.manual_seed(<span class="hljs-number">0</span>) | |
| images_fusion = pipe( | |
| <span class="hljs-string">"masterpiece, best quality, mountain"</span>, generator=generator, num_inference_steps=<span class="hljs-number">2</span> | |
| ).images<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="working-with-multiple-lora-checkpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#working-with-multiple-lora-checkpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Working with multiple LoRA checkpoints</span></h2> <p data-svelte-h="svelte-1vg1fxr">With the <code>fuse_lora()</code> method as described above, it’s possible to load multiple LoRA checkpoints. Let’s work through a complete example. First we load the base pipeline:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionXLPipeline, AutoencoderKL | |
| <span class="hljs-keyword">import</span> torch | |
| vae = AutoencoderKL.from_pretrained(<span class="hljs-string">"madebyollin/sdxl-vae-fp16-fix"</span>, torch_dtype=torch.float16) | |
| pipe = StableDiffusionXLPipeline.from_pretrained( | |
| <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, | |
| vae=vae, | |
| torch_dtype=torch.float16, | |
| ) | |
| pipe.to(<span class="hljs-string">"cuda"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-va7o95">Then let’s two LoRA checkpoints and fuse them with specific <code>lora_scale</code> values:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># LoRA one.</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"goofyai/cyborg_style_xl"</span>) | |
| pipe.fuse_lora(lora_scale=<span class="hljs-number">0.7</span>) | |
| <span class="hljs-comment"># LoRA two.</span> | |
| pipe.load_lora_weights(<span class="hljs-string">"TheLastBen/Pikachu_SDXL"</span>) | |
| pipe.fuse_lora(lora_scale=<span class="hljs-number">0.7</span>)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-ldu6p">Play with the <code>lora_scale</code> parameter when working with multiple LoRAs to control the amount of their influence on the final outputs.</p></div> <p data-svelte-h="svelte-d3kqb7">Let’s see them in action:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"cyborg style pikachu"</span> | |
| image = pipe(prompt, num_inference_steps=<span class="hljs-number">30</span>, guidance_scale=<span class="hljs-number">7.5</span>).images[<span class="hljs-number">0</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-fjwyvv"><img src="https://huggingface.co/datasets/diffusers/docs-images/resolve/main/cyborg_pikachu.png" alt="cyborg_pikachu"></p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-nyydsa">Currently, unfusing multiple LoRA checkpoints is not possible.</p></div> <h2 class="relative group"><a id="supporting-different-lora-checkpoints-from-diffusers" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supporting-different-lora-checkpoints-from-diffusers"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supporting different LoRA checkpoints from Diffusers</span></h2> <p data-svelte-h="svelte-1nkpd1u">🤗 Diffusers supports loading checkpoints from popular LoRA trainers such as <a href="https://github.com/kohya-ss/sd-scripts/" rel="nofollow">Kohya</a> and <a href="https://github.com/TheLastBen/fast-stable-diffusion" rel="nofollow">TheLastBen</a>. In this section, we outline the current API’s details and limitations.</p> <h3 class="relative group"><a id="kohya" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kohya"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kohya</span></h3> <p data-svelte-h="svelte-12q1ys0">This support was made possible because of the amazing contributors: <a href="https://github.com/takuma104" rel="nofollow">@takuma104</a> and <a href="https://github.com/isidentical" rel="nofollow">@isidentical</a>.</p> <p data-svelte-h="svelte-1dpmid3">We support loading Kohya LoRA checkpoints using <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline.load_lora_weights">load_lora_weights()</a>. In this section, we explain how to load such a checkpoint from <a href="https://civitai.com/" rel="nofollow">CivitAI</a> | |
| in Diffusers and perform inference with it.</p> <p data-svelte-h="svelte-gud49k">First, download a checkpoint. We’ll use | |
| <a href="https://civitai.com/models/13239/light-and-shadow" rel="nofollow">this one</a> for demonstration purposes.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->wget https://civitai.com/api/download/models/15603 -O light_and_shadow.safetensors<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-apspcj">Next, we initialize a <a href="/docs/diffusers/v0.22.2/en/api/pipelines/overview#diffusers.DiffusionPipeline">~DiffusionPipeline</a>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline, DPMSolverMultistepScheduler | |
| pipeline = StableDiffusionPipeline.from_pretrained( | |
| <span class="hljs-string">"gsdf/Counterfeit-V2.5"</span>, torch_dtype=torch.float16, safety_checker=<span class="hljs-literal">None</span>, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline.scheduler = DPMSolverMultistepScheduler.from_config( | |
| pipeline.scheduler.config, use_karras_sigmas=<span class="hljs-literal">True</span> | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15lsk7k">We then load the checkpoint downloaded from CivitAI:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pipeline.load_lora_weights(<span class="hljs-string">"."</span>, weight_name=<span class="hljs-string">"light_and_shadow.safetensors"</span>)<!-- HTML_TAG_END --></pre></div> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-gba3ir">If you’re loading a checkpoint in the <code>safetensors</code> format, please ensure you have <code>safetensors</code> installed.</p></div> <p data-svelte-h="svelte-3vmmy1">And then it’s time for running inference:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->prompt = <span class="hljs-string">"masterpiece, best quality, 1girl, at dusk"</span> | |
| negative_prompt = (<span class="hljs-string">"(low quality, worst quality:1.4), (bad anatomy), (inaccurate limb:1.2), "</span> | |
| <span class="hljs-string">"bad composition, inaccurate eyes, extra digit, fewer digits, (extra arms:1.2), large breasts"</span>) | |
| images = pipeline(prompt=prompt, | |
| negative_prompt=negative_prompt, | |
| width=<span class="hljs-number">512</span>, | |
| height=<span class="hljs-number">768</span>, | |
| num_inference_steps=<span class="hljs-number">15</span>, | |
| num_images_per_prompt=<span class="hljs-number">4</span>, | |
| generator=torch.manual_seed(<span class="hljs-number">0</span>) | |
| ).images<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5o1ip6">Below is a comparison between the LoRA and the non-LoRA results:</p> <p data-svelte-h="svelte-5g4zu7"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/lora_non_lora_comparison.png" alt="lora_non_lora"></p> <p data-svelte-h="svelte-1jxoesr">You have a similar checkpoint stored on the Hugging Face Hub, you can load it | |
| directly with <a href="/docs/diffusers/v0.22.2/en/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusionInpaintPipeline.load_lora_weights">load_lora_weights()</a> like so:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->lora_model_id = <span class="hljs-string">"sayakpaul/civitai-light-shadow-lora"</span> | |
| lora_filename = <span class="hljs-string">"light_and_shadow.safetensors"</span> | |
| pipeline.load_lora_weights(lora_model_id, weight_name=lora_filename)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="kohya--stable-diffusion-xl" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kohya--stable-diffusion-xl"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kohya + Stable Diffusion XL</span></h3> <p data-svelte-h="svelte-11mak8l">After the release of <a href="https://huggingface.co/papers/2307.01952" rel="nofollow">Stable Diffusion XL</a>, the community contributed some amazing LoRA checkpoints trained on top of it with the Kohya trainer.</p> <p data-svelte-h="svelte-7e6ei7">Here are some example checkpoints we tried out:</p> <ul data-svelte-h="svelte-ybml08"><li>SDXL 0.9:<ul><li><a href="https://civitai.com/models/22279?modelVersionId=118556" rel="nofollow">https://civitai.com/models/22279?modelVersionId=118556</a></li> <li><a href="https://civitai.com/models/104515/sdxlor30costumesrevue-starlight-saijoclaudine-lora" rel="nofollow">https://civitai.com/models/104515/sdxlor30costumesrevue-starlight-saijoclaudine-lora</a></li> <li><a href="https://civitai.com/models/108448/daiton-sdxl-test" rel="nofollow">https://civitai.com/models/108448/daiton-sdxl-test</a></li> <li><a href="https://filebin.net/2ntfqqnapiu9q3zx/pixelbuildings128-v1.safetensors" rel="nofollow">https://filebin.net/2ntfqqnapiu9q3zx/pixelbuildings128-v1.safetensors</a></li></ul></li> <li>SDXL 1.0:<ul><li><a href="https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_offset_example-lora_1.0.safetensors" rel="nofollow">https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/blob/main/sd_xl_offset_example-lora_1.0.safetensors</a></li></ul></li></ul> <p data-svelte-h="svelte-1rpkybg">Here is an example of how to perform inference with these checkpoints in <code>diffusers</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| base_model_id = <span class="hljs-string">"stabilityai/stable-diffusion-xl-base-0.9"</span> | |
| pipeline = DiffusionPipeline.from_pretrained(base_model_id, torch_dtype=torch.float16).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline.load_lora_weights(<span class="hljs-string">"."</span>, weight_name=<span class="hljs-string">"Kamepan.safetensors"</span>) | |
| prompt = <span class="hljs-string">"anime screencap, glint, drawing, best quality, light smile, shy, a full body of a girl wearing wedding dress in the middle of the forest beneath the trees, fireflies, big eyes, 2d, cute, anime girl, waifu, cel shading, magical girl, vivid colors, (outline:1.1), manga anime artstyle, masterpiece, official wallpaper, glint <lora:kame_sdxl_v2:1>"</span> | |
| negative_prompt = <span class="hljs-string">"(deformed, bad quality, sketch, depth of field, blurry:1.1), grainy, bad anatomy, bad perspective, old, ugly, realistic, cartoon, disney, bad proportions"</span> | |
| generator = torch.manual_seed(<span class="hljs-number">2947883060</span>) | |
| num_inference_steps = <span class="hljs-number">30</span> | |
| guidance_scale = <span class="hljs-number">7</span> | |
| image = pipeline( | |
| prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, | |
| generator=generator, guidance_scale=guidance_scale | |
| ).images[<span class="hljs-number">0</span>] | |
| image.save(<span class="hljs-string">"Kamepan.png"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1bce4zd"><code>Kamepan.safetensors</code> comes from <a href="https://civitai.com/models/22279?modelVersionId=118556" rel="nofollow">https://civitai.com/models/22279?modelVersionId=118556</a> .</p> <p data-svelte-h="svelte-v0ield">If you notice carefully, the inference UX is exactly identical to what we presented in the sections above.</p> <p data-svelte-h="svelte-t4r5z0">Thanks to <a href="https://github.com/isidentical" rel="nofollow">@isidentical</a> for helping us on integrating this feature.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-8p7pue"><strong>Known limitations specific to the Kohya LoRAs</strong>:</p> <ul data-svelte-h="svelte-au0p3i"><li>When images don’t looks similar to other UIs, such as ComfyUI, it can be because of multiple reasons, as explained <a href="https://github.com/huggingface/diffusers/pull/4287/#issuecomment-1655110736" rel="nofollow">here</a>.</li> <li>We don’t fully support <a href="https://github.com/KohakuBlueleaf/LyCORIS" rel="nofollow">LyCORIS checkpoints</a>. To the best of our knowledge, our current <code>load_lora_weights()</code> should support LyCORIS checkpoints that have LoRA and LoCon modules but not the other ones, such as Hada, LoKR, etc.</li></ul></div> <h3 class="relative group"><a id="thelastben" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#thelastben"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>TheLastBen</span></h3> <p data-svelte-h="svelte-1240csr">Here is an example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> DiffusionPipeline | |
| <span class="hljs-keyword">import</span> torch | |
| pipeline_id = <span class="hljs-string">"Lykon/dreamshaper-xl-1-0"</span> | |
| pipe = DiffusionPipeline.from_pretrained(pipeline_id, torch_dtype=torch.float16) | |
| pipe.enable_model_cpu_offload() | |
| lora_model_id = <span class="hljs-string">"TheLastBen/Papercut_SDXL"</span> | |
| lora_filename = <span class="hljs-string">"papercut.safetensors"</span> | |
| pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) | |
| prompt = <span class="hljs-string">"papercut sonic"</span> | |
| image = pipe(prompt=prompt, num_inference_steps=<span class="hljs-number">20</span>, generator=torch.manual_seed(<span class="hljs-number">0</span>)).images[<span class="hljs-number">0</span>] | |
| image<!-- HTML_TAG_END --></pre></div> <p></p> | |
| <script> | |
| { | |
| __sveltekit_ynvcvq = { | |
| assets: "/docs/diffusers/v0.22.2/en", | |
| base: "/docs/diffusers/v0.22.2/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/start.73ea8a3d.js"), | |
| import("/docs/diffusers/v0.22.2/en/_app/immutable/entry/app.60438fe3.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 141], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 101 kB
- Xet hash:
- 509f28aa0df7c7e0e981d9b35a9f1e948e08e443b9b02227c938003c578165af
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.