Buckets:
hf-doc-build/doc / diffusers /main /en /_app /pages /using-diffusers /textual_inversion_inference.mdx-hf-doc-builder.js
| import{S as kl,i as Zl,s as Cl,e as i,k as c,w as M,t as a,M as xl,c as r,d as t,m as f,a as p,x as h,h as n,b as u,N as Wl,G as l,g as o,y,L as Xl,q as b,o as w,B as U,v as Il}from"../../chunks/vendor-hf-doc-builder.js";import{I as Ht}from"../../chunks/IconCopyLink-hf-doc-builder.js";import{C as k}from"../../chunks/CodeBlock-hf-doc-builder.js";import{D as $l}from"../../chunks/DocNotebookDropdown-hf-doc-builder.js";function Sl(qt){let v,ve,J,Z,ie,S,st,re,at,Je,B,je,g,nt,P,ot,it,E,rt,pt,Te,C,ct,O,ft,ut,ke,K,dt,Ze,Q,Ce,ee,mt,xe,D,We,j,x,pe,G,Mt,ce,ht,Xe,W,yt,N,bt,wt,Ie,R,$e,te,Ut,Se,A,Be,X,gt,fe,_t,vt,Ee,Y,Qe,_,Jt,ue,jt,Tt,de,kt,Zt,De,z,Ge,L,me,Pt,Ne,T,I,Me,F,Ct,he,xt,Re,le,Wt,Ae,se,Xt,Ye,V,ze,H,Le,d,It,ye,$t,St,be,Bt,Et,we,Qt,Dt,Ue,Gt,Nt,ge,Rt,At,_e,Yt,zt,Fe,$,Lt,ae,Ft,Vt,Ve,q,He;return S=new Ht({}),B=new $l({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Mixed",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/textual_inversion_inference.ipynb"},{label:"PyTorch",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/pytorch/textual_inversion_inference.ipynb"},{label:"TensorFlow",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers_doc/en/tensorflow/textual_inversion_inference.ipynb"},{label:"Mixed",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/textual_inversion_inference.ipynb"},{label:"PyTorch",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/pytorch/textual_inversion_inference.ipynb"},{label:"TensorFlow",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/diffusers_doc/en/tensorflow/textual_inversion_inference.ipynb"}]}}),Q=new k({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMG5vdGVib29rX2xvZ2luJTBBJTBBbm90ZWJvb2tfbG9naW4oKQ==",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login | |
| notebook_login()`}}),D=new k({props:{code:"aW1wb3J0JTIwb3MlMEFpbXBvcnQlMjB0b3JjaCUwQSUwQWltcG9ydCUyMFBJTCUwQWZyb20lMjBQSUwlMjBpbXBvcnQlMjBJbWFnZSUwQSUwQWZyb20lMjBkaWZmdXNlcnMlMjBpbXBvcnQlMjBTdGFibGVEaWZmdXNpb25QaXBlbGluZSUwQWZyb20lMjBkaWZmdXNlcnMudXRpbHMlMjBpbXBvcnQlMjBtYWtlX2ltYWdlX2dyaWQlMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQ0xJUEZlYXR1cmVFeHRyYWN0b3IlMkMlMjBDTElQVGV4dE1vZGVsJTJDJTIwQ0xJUFRva2VuaXplcg==",highlighted:`<span class="hljs-keyword">import</span> os | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">import</span> PIL | |
| <span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> StableDiffusionPipeline | |
| <span class="hljs-keyword">from</span> diffusers.utils <span class="hljs-keyword">import</span> make_image_grid | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer`}}),G=new Ht({}),R=new k({props:{code:"cHJldHJhaW5lZF9tb2RlbF9uYW1lX29yX3BhdGglMjAlM0QlMjAlMjJydW53YXltbCUyRnN0YWJsZS1kaWZmdXNpb24tdjEtNSUyMiUwQXJlcG9faWRfZW1iZWRzJTIwJTNEJTIwJTIyc2QtY29uY2VwdHMtbGlicmFyeSUyRmNhdC10b3klMjI=",highlighted:`pretrained_model_name_or_path = <span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> | |
| repo_id_embeds = <span class="hljs-string">"sd-concepts-library/cat-toy"</span>`}}),A=new k({props:{code:"cGlwZWxpbmUlMjAlM0QlMjBTdGFibGVEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwcHJldHJhaW5lZF9tb2RlbF9uYW1lX29yX3BhdGglMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYlMkMlMjB1c2Vfc2FmZXRlbnNvcnMlM0RUcnVlJTBBKS50byglMjJjdWRhJTIyKSUwQSUwQXBpcGVsaW5lLmxvYWRfdGV4dHVhbF9pbnZlcnNpb24ocmVwb19pZF9lbWJlZHMp",highlighted:`pipeline = StableDiffusionPipeline.from_pretrained( | |
| pretrained_model_name_or_path, torch_dtype=torch.float16, use_safetensors=<span class="hljs-literal">True</span> | |
| ).to(<span class="hljs-string">"cuda"</span>) | |
| pipeline.load_textual_inversion(repo_id_embeds)`}}),Y=new k({props:{code:"cHJvbXB0JTIwJTNEJTIwJTIyYSUyMGdyYWZpdHRpJTIwaW4lMjBhJTIwZmF2ZWxhJTIwd2FsbCUyMHdpdGglMjBhJTIwJTNDY2F0LXRveSUzRSUyMG9uJTIwaXQlMjIlMEElMEFudW1fc2FtcGxlcyUyMCUzRCUyMDIlMEFudW1fcm93cyUyMCUzRCUyMDI=",highlighted:`prompt = <span class="hljs-string">"a grafitti in a favela wall with a <cat-toy> on it"</span> | |
| num_samples = <span class="hljs-number">2</span> | |
| num_rows = <span class="hljs-number">2</span>`}}),z=new k({props:{code:"YWxsX2ltYWdlcyUyMCUzRCUyMCU1QiU1RCUwQWZvciUyMF8lMjBpbiUyMHJhbmdlKG51bV9yb3dzKSUzQSUwQSUyMCUyMCUyMCUyMGltYWdlcyUyMCUzRCUyMHBpcGUocHJvbXB0JTJDJTIwbnVtX2ltYWdlc19wZXJfcHJvbXB0JTNEbnVtX3NhbXBsZXMlMkMlMjBudW1faW5mZXJlbmNlX3N0ZXBzJTNENTAlMkMlMjBndWlkYW5jZV9zY2FsZSUzRDcuNSkuaW1hZ2VzJTBBJTIwJTIwJTIwJTIwYWxsX2ltYWdlcy5leHRlbmQoaW1hZ2VzKSUwQSUwQWdyaWQlMjAlM0QlMjBtYWtlX2ltYWdlX2dyaWQoYWxsX2ltYWdlcyUyQyUyMG51bV9zYW1wbGVzJTJDJTIwbnVtX3Jvd3MpJTBBZ3JpZA==",highlighted:`all_images = [] | |
| <span class="hljs-keyword">for</span> _ <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_rows): | |
| images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=<span class="hljs-number">50</span>, guidance_scale=<span class="hljs-number">7.5</span>).images | |
| all_images.extend(images) | |
| grid = make_image_grid(all_images, num_samples, num_rows) | |
| grid`}}),F=new Ht({}),V=new k({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMGhmX2h1Yl9kb3dubG9hZCUwQWZyb20lMjBzYWZldGVuc29ycy50b3JjaCUyMGltcG9ydCUyMGxvYWRfZmlsZSUwQSUwQWZpbGUlMjAlM0QlMjBoZl9odWJfZG93bmxvYWQoJTIyZG4xMTglMkZ1bmFlc3RoZXRpY1hMJTIyJTJDJTIwZmlsZW5hbWUlM0QlMjJ1bmFlc3RoZXRpY1hMdjMxLnNhZmV0ZW5zb3JzJTIyKSUwQXN0YXRlX2RpY3QlMjAlM0QlMjBsb2FkX2ZpbGUoZmlsZSklMEFzdGF0ZV9kaWN0",highlighted:`<span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> hf_hub_download | |
| <span class="hljs-keyword">from</span> safetensors.torch <span class="hljs-keyword">import</span> load_file | |
| file = hf_hub_download(<span class="hljs-string">"dn118/unaestheticXL"</span>, filename=<span class="hljs-string">"unaestheticXLv31.safetensors"</span>) | |
| state_dict = load_file(file) | |
| state_dict`}}),H=new k({props:{code:"JTdCJ2NsaXBfZyclM0ElMjB0ZW5zb3IoJTVCJTVCJTIwMC4wMDc3JTJDJTIwLTAuMDExMiUyQyUyMCUyMDAuMDA2NSUyQyUyMCUyMC4uLiUyQyUyMCUyMDAuMDE5NSUyQyUyMCUyMDAuMDE1OSUyQyUyMCUyMDAuMDI3NSU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC4uLiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1Qi0wLjAxNzAlMkMlMjAlMjAwLjAyMTMlMkMlMjAlMjAwLjAxNDMlMkMlMjAlMjAuLi4lMkMlMjAtMC4wMzAyJTJDJTIwLTAuMDI0MCUyQyUyMC0wLjAzNjIlNUQlNUQlMkMlMEElMjAnY2xpcF9sJyUzQSUyMHRlbnNvciglNUIlNUIlMjAwLjAwMjMlMkMlMjAlMjAwLjAxOTIlMkMlMjAlMjAwLjAyMTMlMkMlMjAlMjAuLi4lMkMlMjAtMC4wMzg1JTJDJTIwJTIwMC4wMDQ4JTJDJTIwLTAuMDAxMSU1RCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMC4uLiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU1QiUyMDAuMDQ3NSUyQyUyMC0wLjA1MDglMkMlMjAtMC4wMTQ1JTJDJTIwJTIwLi4uJTJDJTIwJTIwMC4wMDcwJTJDJTIwLTAuMDA4OSUyQyUyMC0wLjAxNjMlNUQlNUQlMkM=",highlighted:`{<span class="hljs-string">'clip_g'</span>: tensor(<span class="hljs-string">[[ 0.0077, -0.0112, 0.0065, ..., 0.0195, 0.0159, 0.0275], | |
| ..., | |
| [-0.0170, 0.0213, 0.0143, ..., -0.0302, -0.0240, -0.0362]]</span>, | |
| <span class="hljs-string">'clip_l'</span>: tensor(<span class="hljs-string">[[ 0.0023, 0.0192, 0.0213, ..., -0.0385, 0.0048, -0.0011], | |
| ..., | |
| [ 0.0475, -0.0508, -0.0145, ..., 0.0070, -0.0089, -0.0163]]</span>,`}}),q=new k({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9QaXBlbGluZUZvclRleHQySW1hZ2UlMEFpbXBvcnQlMjB0b3JjaCUwQSUwQXBpcGUlMjAlM0QlMjBBdXRvUGlwZWxpbmVGb3JUZXh0MkltYWdlLmZyb21fcHJldHJhaW5lZCglMjJzdGFiaWxpdHlhaSUyRnN0YWJsZS1kaWZmdXNpb24teGwtYmFzZS0xLjAlMjIlMkMlMjB2YXJpYW50JTNEJTIyZnAxNiUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFwaXBlLnRvKCUyMmN1ZGElMjIpJTBBJTBBcGlwZS5sb2FkX3RleHR1YWxfaW52ZXJzaW9uKHN0YXRlX2RpY3QlNUIlMjJjbGlwX2clMjIlNUQlMkMlMjB0b2tlbiUzRCUyMnVuYWVzdGhldGljWEx2MzElMjIlMkMlMjB0ZXh0X2VuY29kZXIlM0RwaXBlLnRleHRfZW5jb2Rlcl8yJTJDJTIwdG9rZW5pemVyJTNEcGlwZS50b2tlbml6ZXJfMiklMEFwaXBlLmxvYWRfdGV4dHVhbF9pbnZlcnNpb24oc3RhdGVfZGljdCU1QiUyMmNsaXBfbCUyMiU1RCUyQyUyMHRva2VuJTNEJTIydW5hZXN0aGV0aWNYTHYzMSUyMiUyQyUyMHRleHRfZW5jb2RlciUzRHBpcGUudGV4dF9lbmNvZGVyJTJDJTIwdG9rZW5pemVyJTNEcGlwZS50b2tlbml6ZXIpJTBBJTBBJTIzJTIwdGhlJTIwZW1iZWRkaW5nJTIwc2hvdWxkJTIwYmUlMjB1c2VkJTIwYXMlMjBhJTIwbmVnYXRpdmUlMjBlbWJlZGRpbmclMkMlMjBzbyUyMHdlJTIwcGFzcyUyMGl0JTIwYXMlMjBhJTIwbmVnYXRpdmUlMjBwcm9tcHQlMEFnZW5lcmF0b3IlMjAlM0QlMjB0b3JjaC5HZW5lcmF0b3IoKS5tYW51YWxfc2VlZCgzMyklMEFpbWFnZSUyMCUzRCUyMHBpcGUoJTIyYSUyMHdvbWFuJTIwc3RhbmRpbmclMjBpbiUyMGZyb250JTIwb2YlMjBhJTIwbW91bnRhaW4lMjIlMkMlMjBuZWdhdGl2ZV9wcm9tcHQlM0QlMjJ1bmFlc3RoZXRpY1hMdjMxJTIyJTJDJTIwZ2VuZXJhdG9yJTNEZ2VuZXJhdG9yKS5pbWFnZXMlNUIwJTVE",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForText2Image | |
| <span class="hljs-keyword">import</span> torch | |
| pipe = AutoPipelineForText2Image.from_pretrained(<span class="hljs-string">"stabilityai/stable-diffusion-xl-base-1.0"</span>, variant=<span class="hljs-string">"fp16"</span>, torch_dtype=torch.float16) | |
| pipe.to(<span class="hljs-string">"cuda"</span>) | |
| pipe.load_textual_inversion(state_dict[<span class="hljs-string">"clip_g"</span>], token=<span class="hljs-string">"unaestheticXLv31"</span>, text_encoder=pipe.text_encoder_2, tokenizer=pipe.tokenizer_2) | |
| pipe.load_textual_inversion(state_dict[<span class="hljs-string">"clip_l"</span>], token=<span class="hljs-string">"unaestheticXLv31"</span>, text_encoder=pipe.text_encoder, tokenizer=pipe.tokenizer) | |
| <span class="hljs-comment"># the embedding should be used as a negative embedding, so we pass it as a negative prompt</span> | |
| generator = torch.Generator().manual_seed(<span class="hljs-number">33</span>) | |
| image = pipe(<span class="hljs-string">"a woman standing in front of a mountain"</span>, negative_prompt=<span class="hljs-string">"unaestheticXLv31"</span>, generator=generator).images[<span class="hljs-number">0</span>]`}}),{c(){v=i("meta"),ve=c(),J=i("h1"),Z=i("a"),ie=i("span"),M(S.$$.fragment),st=c(),re=i("span"),at=a("Textual inversion"),Je=c(),M(B.$$.fragment),je=c(),g=i("p"),nt=a("The "),P=i("a"),ot=a("StableDiffusionPipeline"),it=a(" supports textual inversion, a technique that enables a model like Stable Diffusion to learn a new concept from just a few sample images. This gives you more control over the generated images and allows you to tailor the model towards specific concepts. You can get started quickly with a collection of community created concepts in the "),E=i("a"),rt=a("Stable Diffusion Conceptualizer"),pt=a("."),Te=c(),C=i("p"),ct=a("This guide will show you how to run inference with textual inversion using a pre-learned concept from the Stable Diffusion Conceptualizer. If you\u2019re interested in teaching a model new concepts with textual inversion, take a look at the "),O=i("a"),ft=a("Textual Inversion"),ut=a(" training guide."),ke=c(),K=i("p"),dt=a("Login to your Hugging Face account:"),Ze=c(),M(Q.$$.fragment),Ce=c(),ee=i("p"),mt=a("Import the necessary libraries:"),xe=c(),M(D.$$.fragment),We=c(),j=i("h2"),x=i("a"),pe=i("span"),M(G.$$.fragment),Mt=c(),ce=i("span"),ht=a("Stable Diffusion 1 and 2"),Xe=c(),W=i("p"),yt=a("Pick a Stable Diffusion checkpoint and a pre-learned concept from the "),N=i("a"),bt=a("Stable Diffusion Conceptualizer"),wt=a(":"),Ie=c(),M(R.$$.fragment),$e=c(),te=i("p"),Ut=a("Now you can load a pipeline, and pass the pre-learned concept to it:"),Se=c(),M(A.$$.fragment),Be=c(),X=i("p"),gt=a("Create a prompt with the pre-learned concept by using the special placeholder token "),fe=i("code"),_t=a("<cat-toy>"),vt=a(", and choose the number of samples and rows of images you\u2019d like to generate:"),Ee=c(),M(Y.$$.fragment),Qe=c(),_=i("p"),Jt=a("Then run the pipeline (feel free to adjust the parameters like "),ue=i("code"),jt=a("num_inference_steps"),Tt=a(" and "),de=i("code"),kt=a("guidance_scale"),Zt=a(" to see how they affect image quality), save the generated images and visualize them with the helper function you created at the beginning:"),De=c(),M(z.$$.fragment),Ge=c(),L=i("div"),me=i("img"),Ne=c(),T=i("h2"),I=i("a"),Me=i("span"),M(F.$$.fragment),Ct=c(),he=i("span"),xt=a("Stable Diffusion XL"),Re=c(),le=i("p"),Wt=a("Stable Diffusion XL (SDXL) can also use textual inversion vectors for inference. In contrast to Stable Diffusion 1 and 2, SDXL has two text encoders so you\u2019ll need two textual inversion embeddings - one for each text encoder model."),Ae=c(),se=i("p"),Xt=a("Let\u2019s download the SDXL textual inversion embeddings and have a closer look at it\u2019s structure:"),Ye=c(),M(V.$$.fragment),ze=c(),M(H.$$.fragment),Le=c(),d=i("p"),It=a("There are two tensors, "),ye=i("code"),$t=a('"clip-g"'),St=a(" and "),be=i("code"),Bt=a('"clip-l"'),Et=a(`. | |
| `),we=i("code"),Qt=a('"clip-g"'),Dt=a(` corresponds to the bigger text encoder in SDXL and refers to | |
| `),Ue=i("code"),Gt=a("pipe.text_encoder_2"),Nt=a(" and "),ge=i("code"),Rt=a('"clip-l"'),At=a(" refers to "),_e=i("code"),Yt=a("pipe.text_encoder"),zt=a("."),Fe=c(),$=i("p"),Lt=a(`Now you can load each tensor separately by passing them along with the correct text encoder and tokenizer | |
| to `),ae=i("a"),Ft=a("load_textual_inversion()"),Vt=a(":"),Ve=c(),M(q.$$.fragment),this.h()},l(e){const s=xl('[data-svelte="svelte-1phssyn"]',document.head);v=r(s,"META",{name:!0,content:!0}),s.forEach(t),ve=f(e),J=r(e,"H1",{class:!0});var qe=p(J);Z=r(qe,"A",{id:!0,class:!0,href:!0});var Ot=p(Z);ie=r(Ot,"SPAN",{});var Kt=p(ie);h(S.$$.fragment,Kt),Kt.forEach(t),Ot.forEach(t),st=f(qe),re=r(qe,"SPAN",{});var el=p(re);at=n(el,"Textual inversion"),el.forEach(t),qe.forEach(t),Je=f(e),h(B.$$.fragment,e),je=f(e),g=r(e,"P",{});var ne=p(g);nt=n(ne,"The "),P=r(ne,"A",{href:!0});var tl=p(P);ot=n(tl,"StableDiffusionPipeline"),tl.forEach(t),it=n(ne," supports textual inversion, a technique that enables a model like Stable Diffusion to learn a new concept from just a few sample images. This gives you more control over the generated images and allows you to tailor the model towards specific concepts. You can get started quickly with a collection of community created concepts in the "),E=r(ne,"A",{href:!0,rel:!0});var ll=p(E);rt=n(ll,"Stable Diffusion Conceptualizer"),ll.forEach(t),pt=n(ne,"."),ne.forEach(t),Te=f(e),C=r(e,"P",{});var Pe=p(C);ct=n(Pe,"This guide will show you how to run inference with textual inversion using a pre-learned concept from the Stable Diffusion Conceptualizer. If you\u2019re interested in teaching a model new concepts with textual inversion, take a look at the "),O=r(Pe,"A",{href:!0});var sl=p(O);ft=n(sl,"Textual Inversion"),sl.forEach(t),ut=n(Pe," training guide."),Pe.forEach(t),ke=f(e),K=r(e,"P",{});var al=p(K);dt=n(al,"Login to your Hugging Face account:"),al.forEach(t),Ze=f(e),h(Q.$$.fragment,e),Ce=f(e),ee=r(e,"P",{});var nl=p(ee);mt=n(nl,"Import the necessary libraries:"),nl.forEach(t),xe=f(e),h(D.$$.fragment,e),We=f(e),j=r(e,"H2",{class:!0});var Oe=p(j);x=r(Oe,"A",{id:!0,class:!0,href:!0});var ol=p(x);pe=r(ol,"SPAN",{});var il=p(pe);h(G.$$.fragment,il),il.forEach(t),ol.forEach(t),Mt=f(Oe),ce=r(Oe,"SPAN",{});var rl=p(ce);ht=n(rl,"Stable Diffusion 1 and 2"),rl.forEach(t),Oe.forEach(t),Xe=f(e),W=r(e,"P",{});var Ke=p(W);yt=n(Ke,"Pick a Stable Diffusion checkpoint and a pre-learned concept from the "),N=r(Ke,"A",{href:!0,rel:!0});var pl=p(N);bt=n(pl,"Stable Diffusion Conceptualizer"),pl.forEach(t),wt=n(Ke,":"),Ke.forEach(t),Ie=f(e),h(R.$$.fragment,e),$e=f(e),te=r(e,"P",{});var cl=p(te);Ut=n(cl,"Now you can load a pipeline, and pass the pre-learned concept to it:"),cl.forEach(t),Se=f(e),h(A.$$.fragment,e),Be=f(e),X=r(e,"P",{});var et=p(X);gt=n(et,"Create a prompt with the pre-learned concept by using the special placeholder token "),fe=r(et,"CODE",{});var fl=p(fe);_t=n(fl,"<cat-toy>"),fl.forEach(t),vt=n(et,", and choose the number of samples and rows of images you\u2019d like to generate:"),et.forEach(t),Ee=f(e),h(Y.$$.fragment,e),Qe=f(e),_=r(e,"P",{});var oe=p(_);Jt=n(oe,"Then run the pipeline (feel free to adjust the parameters like "),ue=r(oe,"CODE",{});var ul=p(ue);jt=n(ul,"num_inference_steps"),ul.forEach(t),Tt=n(oe," and "),de=r(oe,"CODE",{});var dl=p(de);kt=n(dl,"guidance_scale"),dl.forEach(t),Zt=n(oe," to see how they affect image quality), save the generated images and visualize them with the helper function you created at the beginning:"),oe.forEach(t),De=f(e),h(z.$$.fragment,e),Ge=f(e),L=r(e,"DIV",{class:!0});var ml=p(L);me=r(ml,"IMG",{src:!0}),ml.forEach(t),Ne=f(e),T=r(e,"H2",{class:!0});var tt=p(T);I=r(tt,"A",{id:!0,class:!0,href:!0});var Ml=p(I);Me=r(Ml,"SPAN",{});var hl=p(Me);h(F.$$.fragment,hl),hl.forEach(t),Ml.forEach(t),Ct=f(tt),he=r(tt,"SPAN",{});var yl=p(he);xt=n(yl,"Stable Diffusion XL"),yl.forEach(t),tt.forEach(t),Re=f(e),le=r(e,"P",{});var bl=p(le);Wt=n(bl,"Stable Diffusion XL (SDXL) can also use textual inversion vectors for inference. In contrast to Stable Diffusion 1 and 2, SDXL has two text encoders so you\u2019ll need two textual inversion embeddings - one for each text encoder model."),bl.forEach(t),Ae=f(e),se=r(e,"P",{});var wl=p(se);Xt=n(wl,"Let\u2019s download the SDXL textual inversion embeddings and have a closer look at it\u2019s structure:"),wl.forEach(t),Ye=f(e),h(V.$$.fragment,e),ze=f(e),h(H.$$.fragment,e),Le=f(e),d=r(e,"P",{});var m=p(d);It=n(m,"There are two tensors, "),ye=r(m,"CODE",{});var Ul=p(ye);$t=n(Ul,'"clip-g"'),Ul.forEach(t),St=n(m," and "),be=r(m,"CODE",{});var gl=p(be);Bt=n(gl,'"clip-l"'),gl.forEach(t),Et=n(m,`. | |
| `),we=r(m,"CODE",{});var _l=p(we);Qt=n(_l,'"clip-g"'),_l.forEach(t),Dt=n(m,` corresponds to the bigger text encoder in SDXL and refers to | |
| `),Ue=r(m,"CODE",{});var vl=p(Ue);Gt=n(vl,"pipe.text_encoder_2"),vl.forEach(t),Nt=n(m," and "),ge=r(m,"CODE",{});var Jl=p(ge);Rt=n(Jl,'"clip-l"'),Jl.forEach(t),At=n(m," refers to "),_e=r(m,"CODE",{});var jl=p(_e);Yt=n(jl,"pipe.text_encoder"),jl.forEach(t),zt=n(m,"."),m.forEach(t),Fe=f(e),$=r(e,"P",{});var lt=p($);Lt=n(lt,`Now you can load each tensor separately by passing them along with the correct text encoder and tokenizer | |
| to `),ae=r(lt,"A",{href:!0});var Tl=p(ae);Ft=n(Tl,"load_textual_inversion()"),Tl.forEach(t),Vt=n(lt,":"),lt.forEach(t),Ve=f(e),h(q.$$.fragment,e),this.h()},h(){u(v,"name","hf:doc:metadata"),u(v,"content",JSON.stringify(Bl)),u(Z,"id","textual-inversion"),u(Z,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),u(Z,"href","#textual-inversion"),u(J,"class","relative group"),u(P,"href","/docs/diffusers/main/en/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusionPipeline"),u(E,"href","https://huggingface.co/spaces/sd-concepts-library/stable-diffusion-conceptualizer"),u(E,"rel","nofollow"),u(O,"href","./training/text_inversion"),u(x,"id","stable-diffusion-1-and-2"),u(x,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),u(x,"href","#stable-diffusion-1-and-2"),u(j,"class","relative group"),u(N,"href","https://huggingface.co/spaces/sd-concepts-library/stable-diffusion-conceptualizer"),u(N,"rel","nofollow"),Wl(me.src,Pt="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/textual_inversion_inference.png")||u(me,"src",Pt),u(L,"class","flex justify-center"),u(I,"id","stable-diffusion-xl"),u(I,"class","header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full"),u(I,"href","#stable-diffusion-xl"),u(T,"class","relative group"),u(ae,"href","/docs/diffusers/main/en/api/pipelines/stable_diffusion/depth2img#diffusers.StableDiffusionDepth2ImgPipeline.load_textual_inversion")},m(e,s){l(document.head,v),o(e,ve,s),o(e,J,s),l(J,Z),l(Z,ie),y(S,ie,null),l(J,st),l(J,re),l(re,at),o(e,Je,s),y(B,e,s),o(e,je,s),o(e,g,s),l(g,nt),l(g,P),l(P,ot),l(g,it),l(g,E),l(E,rt),l(g,pt),o(e,Te,s),o(e,C,s),l(C,ct),l(C,O),l(O,ft),l(C,ut),o(e,ke,s),o(e,K,s),l(K,dt),o(e,Ze,s),y(Q,e,s),o(e,Ce,s),o(e,ee,s),l(ee,mt),o(e,xe,s),y(D,e,s),o(e,We,s),o(e,j,s),l(j,x),l(x,pe),y(G,pe,null),l(j,Mt),l(j,ce),l(ce,ht),o(e,Xe,s),o(e,W,s),l(W,yt),l(W,N),l(N,bt),l(W,wt),o(e,Ie,s),y(R,e,s),o(e,$e,s),o(e,te,s),l(te,Ut),o(e,Se,s),y(A,e,s),o(e,Be,s),o(e,X,s),l(X,gt),l(X,fe),l(fe,_t),l(X,vt),o(e,Ee,s),y(Y,e,s),o(e,Qe,s),o(e,_,s),l(_,Jt),l(_,ue),l(ue,jt),l(_,Tt),l(_,de),l(de,kt),l(_,Zt),o(e,De,s),y(z,e,s),o(e,Ge,s),o(e,L,s),l(L,me),o(e,Ne,s),o(e,T,s),l(T,I),l(I,Me),y(F,Me,null),l(T,Ct),l(T,he),l(he,xt),o(e,Re,s),o(e,le,s),l(le,Wt),o(e,Ae,s),o(e,se,s),l(se,Xt),o(e,Ye,s),y(V,e,s),o(e,ze,s),y(H,e,s),o(e,Le,s),o(e,d,s),l(d,It),l(d,ye),l(ye,$t),l(d,St),l(d,be),l(be,Bt),l(d,Et),l(d,we),l(we,Qt),l(d,Dt),l(d,Ue),l(Ue,Gt),l(d,Nt),l(d,ge),l(ge,Rt),l(d,At),l(d,_e),l(_e,Yt),l(d,zt),o(e,Fe,s),o(e,$,s),l($,Lt),l($,ae),l(ae,Ft),l($,Vt),o(e,Ve,s),y(q,e,s),He=!0},p:Xl,i(e){He||(b(S.$$.fragment,e),b(B.$$.fragment,e),b(Q.$$.fragment,e),b(D.$$.fragment,e),b(G.$$.fragment,e),b(R.$$.fragment,e),b(A.$$.fragment,e),b(Y.$$.fragment,e),b(z.$$.fragment,e),b(F.$$.fragment,e),b(V.$$.fragment,e),b(H.$$.fragment,e),b(q.$$.fragment,e),He=!0)},o(e){w(S.$$.fragment,e),w(B.$$.fragment,e),w(Q.$$.fragment,e),w(D.$$.fragment,e),w(G.$$.fragment,e),w(R.$$.fragment,e),w(A.$$.fragment,e),w(Y.$$.fragment,e),w(z.$$.fragment,e),w(F.$$.fragment,e),w(V.$$.fragment,e),w(H.$$.fragment,e),w(q.$$.fragment,e),He=!1},d(e){t(v),e&&t(ve),e&&t(J),U(S),e&&t(Je),U(B,e),e&&t(je),e&&t(g),e&&t(Te),e&&t(C),e&&t(ke),e&&t(K),e&&t(Ze),U(Q,e),e&&t(Ce),e&&t(ee),e&&t(xe),U(D,e),e&&t(We),e&&t(j),U(G),e&&t(Xe),e&&t(W),e&&t(Ie),U(R,e),e&&t($e),e&&t(te),e&&t(Se),U(A,e),e&&t(Be),e&&t(X),e&&t(Ee),U(Y,e),e&&t(Qe),e&&t(_),e&&t(De),U(z,e),e&&t(Ge),e&&t(L),e&&t(Ne),e&&t(T),U(F),e&&t(Re),e&&t(le),e&&t(Ae),e&&t(se),e&&t(Ye),U(V,e),e&&t(ze),U(H,e),e&&t(Le),e&&t(d),e&&t(Fe),e&&t($),e&&t(Ve),U(q,e)}}}const Bl={local:"textual-inversion",sections:[{local:"stable-diffusion-1-and-2",title:"Stable Diffusion 1 and 2"},{local:"stable-diffusion-xl",title:"Stable Diffusion XL"}],title:"Textual inversion"};function El(qt){return Il(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Rl extends kl{constructor(v){super();Zl(this,v,El,Sl,Cl,{})}}export{Rl as default,Bl as metadata}; | |
Xet Storage Details
- Size:
- 22.8 kB
- Xet hash:
- c46e8f8b67b367ce789f467fa085115edf0477c66cfcc03d15bc1b0e617efe85
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.