Buckets:

rtrm's picture
download
raw
42.5 kB
import{s as vt,o as Ht,n as We}from"../chunks/scheduler.e4ff9b64.js";import{S as It,i as Xt,e as u,s as i,c as U,h as Bt,a as b,d as t,b as r,f as Vt,g as J,j as _,k as je,l as Nt,m as l,n as h,t as $,o as T,p as w}from"../chunks/index.09f1bca0.js";import{C as Qt,H as Ze,E as Ft}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.f5199cd9.js";import{C as G}from"../chunks/CodeBlock.1680a1fd.js";import{H as ct,a as ge}from"../chunks/HfOption.44827c7f.js";function Et(C){let a,M=`<a href="https://github.com/huggingface/diffusers/blob/6e68c71503682c8693cb5b06a4da4911dfd655ee/examples/kandinsky2_2/text_to_image/train_text_to_image_prior.py#L441" rel="nofollow"><code>main()</code></a> 函数包含代码 f
或准备数据集和训练模型。`,p,f,n="您会立即注意到的主要区别之一是,训练脚本除了调度器和分词器外,还加载了一个 <code>CLIPImageProcessor</code> 用于预处理图像,以及一个 <code>CLIPVisionModelWithProjection</code> 模型用于编码图像:",o,y,d,j,E="Kandinsky 使用一个 <code>PriorTransformer</code> 来生成图像嵌入,因此您需要设置优化器来学习先验模型的参数。",X,W,N,k,B='接下来,输入标题被分词,图像由 <code>CLIPImageProcessor</code> <a href="https://github.com/huggingface/diffusers/blob/6e68c71503682c8693cb5b06a4da4911dfd655ee/examples/kandinsky2_2/text_to_image/train_text_to_image_prior.py#L632" rel="nofollow">预处理</a>:',Q,R,V,v,H='最后,<a href="https://github.com/huggingface/diffusers/blob/6e68c71503682c8693cb5b06a4da4911dfd655ee/examples/kandinsky2_2/text_to_image/train_text_to_image_prior.py#L718" rel="nofollow">训练循环</a> 将输入图像转换为潜在表示,向图像嵌入添加噪声,并进行预测:',x,c,Z,I,P='如果您想了解更多关于训练循环的工作原理,请查看 <a href="../using-diffusers/write_own_pipeline">理解管道、模型和调度器</a> 教程,该教程分解了去噪过程的基本模式。',F;return y=new G({props:{code:"bm9pc2Vfc2NoZWR1bGVyJTIwJTNEJTIwRERQTVNjaGVkdWxlcihiZXRhX3NjaGVkdWxlJTNEJTIyc3F1YXJlZGNvc19jYXBfdjIlMjIlMkMlMjBwcmVkaWN0aW9uX3R5cGUlM0QlMjJzYW1wbGUlMjIpJTBBaW1hZ2VfcHJvY2Vzc29yJTIwJTNEJTIwQ0xJUEltYWdlUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjBhcmdzLnByZXRyYWluZWRfcHJpb3JfbW9kZWxfbmFtZV9vcl9wYXRoJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfcHJvY2Vzc29yJTIyJTBBKSUwQXRva2VuaXplciUyMCUzRCUyMENMSVBUb2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKGFyZ3MucHJldHJhaW5lZF9wcmlvcl9tb2RlbF9uYW1lX29yX3BhdGglMkMlMjBzdWJmb2xkZXIlM0QlMjJ0b2tlbml6ZXIlMjIpJTBBJTBBd2l0aCUyMENvbnRleHRNYW5hZ2VycyhkZWVwc3BlZWRfemVyb19pbml0X2Rpc2FibGVkX2NvbnRleHRfbWFuYWdlcigpKSUzQSUwQSUyMCUyMCUyMCUyMGltYWdlX2VuY29kZXIlMjAlM0QlMjBDTElQVmlzaW9uTW9kZWxXaXRoUHJvamVjdGlvbi5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYXJncy5wcmV0cmFpbmVkX3ByaW9yX21vZGVsX25hbWVfb3JfcGF0aCUyQyUyMHN1YmZvbGRlciUzRCUyMmltYWdlX2VuY29kZXIlMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHdlaWdodF9kdHlwZSUwQSUyMCUyMCUyMCUyMCkuZXZhbCgpJTBBJTIwJTIwJTIwJTIwdGV4dF9lbmNvZGVyJTIwJTNEJTIwQ0xJUFRleHRNb2RlbFdpdGhQcm9qZWN0aW9uLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBhcmdzLnByZXRyYWluZWRfcHJpb3JfbW9kZWxfbmFtZV9vcl9wYXRoJTJDJTIwc3ViZm9sZGVyJTNEJTIydGV4dF9lbmNvZGVyJTIyJTJDJTIwdG9yY2hfZHR5cGUlM0R3ZWlnaHRfZHR5cGUlMEElMjAlMjAlMjAlMjApLmV2YWwoKQ==",highlighted:`noise_scheduler = DDPMScheduler(beta_schedule=<span class="hljs-string">&quot;squaredcos_cap_v2&quot;</span>, prediction_type=<span class="hljs-string">&quot;sample&quot;</span>)
image_processor = CLIPImageProcessor.from_pretrained(
args.pretrained_prior_model_name_or_path, subfolder=<span class="hljs-string">&quot;image_processor&quot;</span>
)
tokenizer = CLIPTokenizer.from_pretrained(args.pretrained_prior_model_name_or_path, subfolder=<span class="hljs-string">&quot;tokenizer&quot;</span>)
<span class="hljs-keyword">with</span> ContextManagers(deepspeed_zero_init_disabled_context_manager()):
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
args.pretrained_prior_model_name_or_path, subfolder=<span class="hljs-string">&quot;image_encoder&quot;</span>, torch_dtype=weight_dtype
).<span class="hljs-built_in">eval</span>()
text_encoder = CLIPTextModelWithProjection.from_pretrained(
args.pretrained_prior_model_name_or_path, subfolder=<span class="hljs-string">&quot;text_encoder&quot;</span>, torch_dtype=weight_dtype
).<span class="hljs-built_in">eval</span>()`,wrap:!1}}),W=new G({props:{code:"cHJpb3IlMjAlM0QlMjBQcmlvclRyYW5zZm9ybWVyLmZyb21fcHJldHJhaW5lZChhcmdzLnByZXRyYWluZWRfcHJpb3JfbW9kZWxfbmFtZV9vcl9wYXRoJTJDJTIwc3ViZm9sZGVyJTNEJTIycHJpb3IlMjIpJTBBcHJpb3IudHJhaW4oKSUwQW9wdGltaXplciUyMCUzRCUyMG9wdGltaXplcl9jbHMoJTBBJTIwJTIwJTIwJTIwcHJpb3IucGFyYW1ldGVycygpJTJDJTBBJTIwJTIwJTIwJTIwbHIlM0RhcmdzLmxlYXJuaW5nX3JhdGUlMkMlMEElMjAlMjAlMjAlMjBiZXRhcyUzRChhcmdzLmFkYW1fYmV0YTElMkMlMjBhcmdzLmFkYW1fYmV0YTIpJTJDJTBBJTIwJTIwJTIwJTIwd2VpZ2h0X2RlY2F5JTNEYXJncy5hZGFtX3dlaWdodF9kZWNheSUyQyUwQSUyMCUyMCUyMCUyMGVwcyUzRGFyZ3MuYWRhbV9lcHNpbG9uJTJDJTBBKQ==",highlighted:`prior = PriorTransformer.from_pretrained(args.pretrained_prior_model_name_or_path, subfolder=<span class="hljs-string">&quot;prior&quot;</span>)
prior.train()
optimizer = optimizer_cls(
prior.parameters(),
lr=args.learning_rate,
betas=(args.adam_beta1, args.adam_beta2),
weight_decay=args.adam_weight_decay,
eps=args.adam_epsilon,
)`,wrap:!1}}),R=new G({props:{code:"ZGVmJTIwcHJlcHJvY2Vzc190cmFpbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBpbWFnZXMlMjAlM0QlMjAlNUJpbWFnZS5jb252ZXJ0KCUyMlJHQiUyMiklMjBmb3IlMjBpbWFnZSUyMGluJTIwZXhhbXBsZXMlNUJpbWFnZV9jb2x1bW4lNUQlNUQlMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMmNsaXBfcGl4ZWxfdmFsdWVzJTIyJTVEJTIwJTNEJTIwaW1hZ2VfcHJvY2Vzc29yKGltYWdlcyUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpLnBpeGVsX3ZhbHVlcyUwQSUyMCUyMCUyMCUyMGV4YW1wbGVzJTVCJTIydGV4dF9pbnB1dF9pZHMlMjIlNUQlMkMlMjBleGFtcGxlcyU1QiUyMnRleHRfbWFzayUyMiU1RCUyMCUzRCUyMHRva2VuaXplX2NhcHRpb25zKGV4YW1wbGVzKSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGV4YW1wbGVz",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_train</span>(<span class="hljs-params">examples</span>):
images = [image.convert(<span class="hljs-string">&quot;RGB&quot;</span>) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[image_column]]
examples[<span class="hljs-string">&quot;clip_pixel_values&quot;</span>] = image_processor(images, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>).pixel_values
examples[<span class="hljs-string">&quot;text_input_ids&quot;</span>], examples[<span class="hljs-string">&quot;text_mask&quot;</span>] = tokenize_captions(examples)
<span class="hljs-keyword">return</span> examples`,wrap:!1}}),c=new G({props:{code:"bW9kZWxfcHJlZCUyMCUzRCUyMHByaW9yKCUwQSUyMCUyMCUyMCUyMG5vaXN5X2xhdGVudHMlMkMlMEElMjAlMjAlMjAlMjB0aW1lc3RlcCUzRHRpbWVzdGVwcyUyQyUwQSUyMCUyMCUyMCUyMHByb2pfZW1iZWRkaW5nJTNEcHJvbXB0X2VtYmVkcyUyQyUwQSUyMCUyMCUyMCUyMGVuY29kZXJfaGlkZGVuX3N0YXRlcyUzRHRleHRfZW5jb2Rlcl9oaWRkZW5fc3RhdGVzJTJDJTBBJTIwJTIwJTIwJTIwYXR0ZW50aW9uX21hc2slM0R0ZXh0X21hc2slMkMlMEEpLnByZWRpY3RlZF9pbWFnZV9lbWJlZGRpbmc=",highlighted:`model_pred = prior(
noisy_latents,
timestep=timesteps,
proj_embedding=prompt_embeds,
encoder_hidden_states=text_encoder_hidden_states,
attention_mask=text_mask,
).predicted_image_embedding`,wrap:!1}}),{c(){a=u("p"),a.innerHTML=M,p=i(),f=u("p"),f.innerHTML=n,o=i(),U(y.$$.fragment),d=i(),j=u("p"),j.innerHTML=E,X=i(),U(W.$$.fragment),N=i(),k=u("p"),k.innerHTML=B,Q=i(),U(R.$$.fragment),V=i(),v=u("p"),v.innerHTML=H,x=i(),U(c.$$.fragment),Z=i(),I=u("p"),I.innerHTML=P},l(m){a=b(m,"P",{"data-svelte-h":!0}),_(a)!=="svelte-1x90g38"&&(a.innerHTML=M),p=r(m),f=b(m,"P",{"data-svelte-h":!0}),_(f)!=="svelte-l9ezec"&&(f.innerHTML=n),o=r(m),J(y.$$.fragment,m),d=r(m),j=b(m,"P",{"data-svelte-h":!0}),_(j)!=="svelte-120gd1l"&&(j.innerHTML=E),X=r(m),J(W.$$.fragment,m),N=r(m),k=b(m,"P",{"data-svelte-h":!0}),_(k)!=="svelte-o1316t"&&(k.innerHTML=B),Q=r(m),J(R.$$.fragment,m),V=r(m),v=b(m,"P",{"data-svelte-h":!0}),_(v)!=="svelte-25wyru"&&(v.innerHTML=H),x=r(m),J(c.$$.fragment,m),Z=r(m),I=b(m,"P",{"data-svelte-h":!0}),_(I)!=="svelte-t0nra5"&&(I.innerHTML=P)},m(m,g){l(m,a,g),l(m,p,g),l(m,f,g),l(m,o,g),h(y,m,g),l(m,d,g),l(m,j,g),l(m,X,g),h(W,m,g),l(m,N,g),l(m,k,g),l(m,Q,g),h(R,m,g),l(m,V,g),l(m,v,g),l(m,x,g),h(c,m,g),l(m,Z,g),l(m,I,g),F=!0},p:We,i(m){F||($(y.$$.fragment,m),$(W.$$.fragment,m),$(R.$$.fragment,m),$(c.$$.fragment,m),F=!0)},o(m){T(y.$$.fragment,m),T(W.$$.fragment,m),T(R.$$.fragment,m),T(c.$$.fragment,m),F=!1},d(m){m&&(t(a),t(p),t(f),t(o),t(d),t(j),t(X),t(N),t(k),t(Q),t(V),t(v),t(x),t(Z),t(I)),w(y,m),w(W,m),w(R,m),w(c,m)}}}function Yt(C){let a,M=`The [<code>main()</code>](<a href="https://github.com/huggingface/di" rel="nofollow">https://github.com/huggingface/di</a>
ffusers/blob/6e68c71503682c8693cb5b06a4da4911dfd655ee/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py#L440) 函数包含准备数据集和训练模型的代码。`,p,f,n="与之前的模型不同,解码器初始化一个 <code>VQModel</code> 来将潜在变量解码为图像,并使用一个 <code>UNet2DConditionModel</code>:",o,y,d,j,E='接下来,脚本包括几个图像变换和一个用于对图像应用变换并返回像素值的<a href="https://github.com/huggingface/diffusers/blob/6e68c71503682c8693cb5b06a4da4911dfd655ee/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py#L622" rel="nofollow">预处理</a>函数:',X,W,N,k,B='最后,<a href="https://github.com/huggingface/diffusers/blob/6e68c71503682c8693cb5b06a4da4911dfd655ee/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py#L706" rel="nofollow">训练循环</a>处理将图像转换为潜在变量、添加噪声和预测噪声残差。',Q,R,V='如果您想了解更多关于训练循环如何工作的信息,请查看<a href="../using-diffusers/write_own_pipeline">理解管道、模型和调度器</a>教程,该教程分解了去噪过程的基本模式。',v,H,x;return y=new G({props:{code:"d2l0aCUyMENvbnRleHRNYW5hZ2VycyhkZWVwc3BlZWRfemVyb19pbml0X2Rpc2FibGVkX2NvbnRleHRfbWFuYWdlcigpKSUzQSUwQSUyMCUyMCUyMCUyMHZhZSUyMCUzRCUyMFZRTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGFyZ3MucHJldHJhaW5lZF9kZWNvZGVyX21vZGVsX25hbWVfb3JfcGF0aCUyQyUyMHN1YmZvbGRlciUzRCUyMm1vdnElMjIlMkMlMjB0b3JjaF9kdHlwZSUzRHdlaWdodF9kdHlwZSUwQSUyMCUyMCUyMCUyMCkuZXZhbCgpJTBBJTIwJTIwJTIwJTIwaW1hZ2VfZW5jb2RlciUyMCUzRCUyMENMSVBWaXNpb25Nb2RlbFdpdGhQcm9qZWN0aW9uLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBhcmdzLnByZXRyYWluZWRfcHJpb3JfbW9kZWxfbmFtZV9vcl9wYXRoJTJDJTIwc3ViZm9sZGVyJTNEJTIyaW1hZ2VfZW5jb2RlciUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEd2VpZ2h0X2R0eXBlJTBBJTIwJTIwJTIwJTIwKS5ldmFsKCklMEF1bmV0JTIwJTNEJTIwVU5ldDJEQ29uZGl0aW9uTW9kZWwuZnJvbV9wcmV0cmFpbmVkKGFyZ3MucHJldHJhaW5lZF9kZWNvZGVyX21vZGVsX25hbWVfb3JfcGF0aCUyQyUyMHN1YmZvbGRlciUzRCUyMnVuZXQlMjIp",highlighted:`<span class="hljs-keyword">with</span> ContextManagers(deepspeed_zero_init_disabled_context_manager()):
vae = VQModel.from_pretrained(
args.pretrained_decoder_model_name_or_path, subfolder=<span class="hljs-string">&quot;movq&quot;</span>, torch_dtype=weight_dtype
).<span class="hljs-built_in">eval</span>()
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
args.pretrained_prior_model_name_or_path, subfolder=<span class="hljs-string">&quot;image_encoder&quot;</span>, torch_dtype=weight_dtype
).<span class="hljs-built_in">eval</span>()
unet = UNet2DConditionModel.from_pretrained(args.pretrained_decoder_model_name_or_path, subfolder=<span class="hljs-string">&quot;unet&quot;</span>)`,wrap:!1}}),W=new G({props:{code:"ZGVmJTIwcHJlcHJvY2Vzc190cmFpbihleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBpbWFnZXMlMjAlM0QlMjAlNUJpbWFnZS5jb252ZXJ0KCUyMlJHQiUyMiklMjBmb3IlMjBpbWFnZSUyMGluJTIwZXhhbXBsZXMlNUJpbWFnZV9jb2x1bW4lNUQlNUQlMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMnBpeGVsX3ZhbHVlcyUyMiU1RCUyMCUzRCUyMCU1QnRyYWluX3RyYW5zZm9ybXMoaW1hZ2UpJTIwZm9yJTIwaW1hZ2UlMjBpbiUyMGltYWdlcyU1RCUwQSUyMCUyMCUyMCUyMGV4YW1wbGVzJTVCJTIyY2xpcF9waXhlbF92YWx1ZXMlMjIlNUQlMjAlM0QlMjBpbWFnZV9wcm9jZXNzb3IoaW1hZ2VzJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMikucGl4ZWxfdmFsdWVzJTBBJTIwJTIwJTIwJTIwcmV0dXJuJTIwZXhhbXBsZXM=",highlighted:`<span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_train</span>(<span class="hljs-params">examples</span>):
images = [image.convert(<span class="hljs-string">&quot;RGB&quot;</span>) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[image_column]]
examples[<span class="hljs-string">&quot;pixel_values&quot;</span>] = [train_transforms(image) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> images]
examples[<span class="hljs-string">&quot;clip_pixel_values&quot;</span>] = image_processor(images, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>).pixel_values
<span class="hljs-keyword">return</span> examples`,wrap:!1}}),H=new G({props:{code:"bW9kZWxfcHJlZCUyMCUzRCUyMHVuZXQobm9pc3lfbGF0ZW50cyUyQyUyMHRpbWVzdGVwcyUyQyUyME5vbmUlMkMlMjBhZGRlZF9jb25kX2t3YXJncyUzRGFkZGVkX2NvbmRfa3dhcmdzKS5zYW1wbGUlNUIlM0ElMkMlMjAlM0E0JTVE",highlighted:'model_pred = unet(noisy_latents, timesteps, <span class="hljs-literal">None</span>, added_cond_kwargs=added_cond_kwargs).sample[:, :<span class="hljs-number">4</span>]',wrap:!1}}),{c(){a=u("p"),a.innerHTML=M,p=i(),f=u("p"),f.innerHTML=n,o=i(),U(y.$$.fragment),d=i(),j=u("p"),j.innerHTML=E,X=i(),U(W.$$.fragment),N=i(),k=u("p"),k.innerHTML=B,Q=i(),R=u("p"),R.innerHTML=V,v=i(),U(H.$$.fragment)},l(c){a=b(c,"P",{"data-svelte-h":!0}),_(a)!=="svelte-uuvj6j"&&(a.innerHTML=M),p=r(c),f=b(c,"P",{"data-svelte-h":!0}),_(f)!=="svelte-1o46svh"&&(f.innerHTML=n),o=r(c),J(y.$$.fragment,c),d=r(c),j=b(c,"P",{"data-svelte-h":!0}),_(j)!=="svelte-69asr1"&&(j.innerHTML=E),X=r(c),J(W.$$.fragment,c),N=r(c),k=b(c,"P",{"data-svelte-h":!0}),_(k)!=="svelte-1o16k7c"&&(k.innerHTML=B),Q=r(c),R=b(c,"P",{"data-svelte-h":!0}),_(R)!=="svelte-1rwgka7"&&(R.innerHTML=V),v=r(c),J(H.$$.fragment,c)},m(c,Z){l(c,a,Z),l(c,p,Z),l(c,f,Z),l(c,o,Z),h(y,c,Z),l(c,d,Z),l(c,j,Z),l(c,X,Z),h(W,c,Z),l(c,N,Z),l(c,k,Z),l(c,Q,Z),l(c,R,Z),l(c,v,Z),h(H,c,Z),x=!0},p:We,i(c){x||($(y.$$.fragment,c),$(W.$$.fragment,c),$(H.$$.fragment,c),x=!0)},o(c){T(y.$$.fragment,c),T(W.$$.fragment,c),T(H.$$.fragment,c),x=!1},d(c){c&&(t(a),t(p),t(f),t(o),t(d),t(j),t(X),t(N),t(k),t(Q),t(R),t(v)),w(y,c),w(W,c),w(H,c)}}}function At(C){let a,M,p,f;return a=new ge({props:{id:"script",option:"prior model",$$slots:{default:[Et]},$$scope:{ctx:C}}}),p=new ge({props:{id:"script",option:"decoder model",$$slots:{default:[Yt]},$$scope:{ctx:C}}}),{c(){U(a.$$.fragment),M=i(),U(p.$$.fragment)},l(n){J(a.$$.fragment,n),M=r(n),J(p.$$.fragment,n)},m(n,o){h(a,n,o),l(n,M,o),h(p,n,o),f=!0},p(n,o){const y={};o&2&&(y.$$scope={dirty:o,ctx:n}),a.$set(y);const d={};o&2&&(d.$$scope={dirty:o,ctx:n}),p.$set(d)},i(n){f||($(a.$$.fragment,n),$(p.$$.fragment,n),f=!0)},o(n){T(a.$$.fragment,n),T(p.$$.fragment,n),f=!1},d(n){n&&t(M),w(a,n),w(p,n)}}}function Lt(C){let a,M;return a=new G({props:{code:"ZXhwb3J0JTIwREFUQVNFVF9OQU1FJTNEJTIybGFtYmRhbGFicyUyRm5hcnV0by1ibGlwLWNhcHRpb25zJTIyJTBBJTBBYWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tbWl4ZWRfcHJlY2lzaW9uJTNEJTIyZnAxNiUyMiUyMCUyMHRyYWluX3RleHRfdG9faW1hZ2VfcHJpb3IucHklMjAlNUMlMEElMjAlMjAtLWRhdGFzZXRfbmFtZSUzRCUyNERBVEFTRVRfTkFNRSUyMCU1QyUwQSUyMCUyMC0tcmVzb2x1dGlvbiUzRDc2OCUyMCU1QyUwQSUyMCUyMC0tdHJhaW5fYmF0Y2hfc2l6ZSUzRDElMjAlNUMlMEElMjAlMjAtLWdyYWRpZW50X2FjY3VtdWxhdGlvbl9zdGVwcyUzRDQlMjAlNUMlMEElMjAlMjAtLW1heF90cmFpbl9zdGVwcyUzRDE1MDAwJTIwJTVDJTBBJTIwJTIwLS1sZWFybmluZ19yYXRlJTNEMWUtMDUlMjAlNUMlMEElMjAlMjAtLW1heF9ncmFkX25vcm0lM0QxJTIwJTVDJTBBJTIwJTIwLS1jaGVja3BvaW50c190b3RhbF9saW1pdCUzRDMlMjAlNUMlMEElMjAlMjAtLWxyX3NjaGVkdWxlciUzRCUyMmNvbnN0YW50JTIyJTIwJTVDJTBBJTIwJTIwLS1scl93YXJtdXBfc3RlcHMlM0QwJTIwJTVDJTBBJTIwJTIwLS12YWxpZGF0aW9uX3Byb21wdHMlM0QlMjJBJTIwcm9ib3QlMjBuYXJ1dG8lMkMlMjA0ayUyMHBob3RvJTIyJTIwJTVDJTBBJTIwJTIwLS1yZXBvcnRfdG8lM0QlMjJ3YW5kYiUyMiUyMCU1QyUwQSUyMCUyMC0tcHVzaF90b19odWIlMjAlNUMlMEElMjAlMjAtLW91dHB1dF9kaXIlM0QlMjJrYW5kaTItcHJpb3ItbmFydXRvLW1vZGVsJTIy",highlighted:`<span class="hljs-built_in">export</span> DATASET_NAME=<span class="hljs-string">&quot;lambdalabs/naruto-blip-captions&quot;</span>
accelerate launch --mixed_precision=<span class="hljs-string">&quot;fp16&quot;</span> train_text_to_image_prior.py \\
--dataset_name=<span class="hljs-variable">$DATASET_NAME</span> \\
--resolution=768 \\
--train_batch_size=1 \\
--gradient_accumulation_steps=4 \\
--max_train_steps=15000 \\
--learning_rate=1e-05 \\
--max_grad_norm=1 \\
--checkpoints_total_limit=3 \\
--lr_scheduler=<span class="hljs-string">&quot;constant&quot;</span> \\
--lr_warmup_steps=0 \\
--validation_prompts=<span class="hljs-string">&quot;A robot naruto, 4k photo&quot;</span> \\
--report_to=<span class="hljs-string">&quot;wandb&quot;</span> \\
--push_to_hub \\
--output_dir=<span class="hljs-string">&quot;kandi2-prior-naruto-model&quot;</span>`,wrap:!1}}),{c(){U(a.$$.fragment)},l(p){J(a.$$.fragment,p)},m(p,f){h(a,p,f),M=!0},p:We,i(p){M||($(a.$$.fragment,p),M=!0)},o(p){T(a.$$.fragment,p),M=!1},d(p){w(a,p)}}}function zt(C){let a,M;return a=new G({props:{code:"ZXhwb3J0JTIwREFUQVNFVF9OQU1FJTNEJTIybGFtYmRhbGFicyUyRm5hcnV0by1ibGlwLWNhcHRpb25zJTIyJTBBJTBBYWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tbWl4ZWRfcHJlY2lzaW9uJTNEJTIyZnAxNiUyMiUyMCUyMHRyYWluX3RleHRfdG9faW1hZ2VfZGVjb2Rlci5weSUyMCU1QyUwQSUyMCUyMC0tZGF0YXNldF9uYW1lJTNEJTI0REFUQVNFVF9OQU1FJTIwJTVDJTBBJTIwJTIwLS1yZXNvbHV0aW9uJTNENzY4JTIwJTVDJTBBJTIwJTIwLS10cmFpbl9iYXRjaF9zaXplJTNEMSUyMCU1QyUwQSUyMCUyMC0tZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTNENCUyMCU1QyUwQSUyMCUyMC0tZ3JhZGllbnRfY2hlY2twb2ludGluZyUyMCU1QyUwQSUyMCUyMC0tbWF4X3RyYWluX3N0ZXBzJTNEMTUwMDAlMjAlNUMlMEElMjAlMjAtLWxlYXJuaW5nX3JhdGUlM0QxZS0wNSUyMCU1QyUwQSUyMCUyMC0tbWF4X2dyYWRfbm9ybSUzRDElMjAlNUMlMEElMjAlMjAtLWNoZWNrcG9pbnRzX3RvdGFsX2xpbWl0JTNEMyUyMCU1QyUwQSUyMCUyMC0tbHJfc2NoZWR1bGVyJTNEJTIyY29uc3RhbnQlMjIlMjAlNUMlMEElMjAlMjAtLWxyX3dhcm11cF9zdGVwcyUzRDAlMjAlNUMlMEElMjAlMjAtLXZhbGlkYXRpb25fcHJvbXB0cyUzRCUyMkElMjByb2JvdCUyMG5hcnV0byUyQyUyMDRrJTIwcGhvdG8lMjIlMjAlNUMlMEElMjAlMjAtLXJlcG9ydF90byUzRCUyMndhbmRiJTIyJTIwJTVDJTBBJTIwJTIwLS1wdXNoX3RvX2h1YiUyMCU1QyUwQSUyMCUyMC0tb3V0cHV0X2RpciUzRCUyMmthbmRpMi1kZWNvZGVyLW5hcnV0by1tb2RlbCUyMg==",highlighted:`<span class="hljs-built_in">export</span> DATASET_NAME=<span class="hljs-string">&quot;lambdalabs/naruto-blip-captions&quot;</span>
accelerate launch --mixed_precision=<span class="hljs-string">&quot;fp16&quot;</span> train_text_to_image_decoder.py \\
--dataset_name=<span class="hljs-variable">$DATASET_NAME</span> \\
--resolution=768 \\
--train_batch_size=1 \\
--gradient_accumulation_steps=4 \\
--gradient_checkpointing \\
--max_train_steps=15000 \\
--learning_rate=1e-05 \\
--max_grad_norm=1 \\
--checkpoints_total_limit=3 \\
--lr_scheduler=<span class="hljs-string">&quot;constant&quot;</span> \\
--lr_warmup_steps=0 \\
--validation_prompts=<span class="hljs-string">&quot;A robot naruto, 4k photo&quot;</span> \\
--report_to=<span class="hljs-string">&quot;wandb&quot;</span> \\
--push_to_hub \\
--output_dir=<span class="hljs-string">&quot;kandi2-decoder-naruto-model&quot;</span>`,wrap:!1}}),{c(){U(a.$$.fragment)},l(p){J(a.$$.fragment,p)},m(p,f){h(a,p,f),M=!0},p:We,i(p){M||($(a.$$.fragment,p),M=!0)},o(p){T(a.$$.fragment,p),M=!1},d(p){w(a,p)}}}function qt(C){let a,M,p,f;return a=new ge({props:{id:"training-inference",option:"prior model",$$slots:{default:[Lt]},$$scope:{ctx:C}}}),p=new ge({props:{id:"training-inference",option:"decoder model",$$slots:{default:[zt]},$$scope:{ctx:C}}}),{c(){U(a.$$.fragment),M=i(),U(p.$$.fragment)},l(n){J(a.$$.fragment,n),M=r(n),J(p.$$.fragment,n)},m(n,o){h(a,n,o),l(n,M,o),h(p,n,o),f=!0},p(n,o){const y={};o&2&&(y.$$scope={dirty:o,ctx:n}),a.$set(y);const d={};o&2&&(d.$$scope={dirty:o,ctx:n}),p.$set(d)},i(n){f||($(a.$$.fragment,n),$(p.$$.fragment,n),f=!0)},o(n){T(a.$$.fragment,n),T(p.$$.fragment,n),f=!1},d(n){n&&t(M),w(a,n),w(p,n)}}}function St(C){let a,M,p,f="<p>可以随意将 <code>kandinsky-community/kandinsky-2-2-decoder</code> 替换为您自己训练的 decoder 检查点!</p>",n;return a=new G({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9QaXBlbGluZUZvclRleHQySW1hZ2UlMkMlMjBEaWZmdXNpb25QaXBlbGluZSUwQWltcG9ydCUyMHRvcmNoJTBBJTBBcHJpb3JfcGlwZWxpbmUlMjAlM0QlMjBEaWZmdXNpb25QaXBlbGluZS5mcm9tX3ByZXRyYWluZWQob3V0cHV0X2RpciUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFwcmlvcl9jb21wb25lbnRzJTIwJTNEJTIwJTdCJTIycHJpb3JfJTIyJTIwJTJCJTIwayUzQSUyMHYlMjBmb3IlMjBrJTJDdiUyMGluJTIwcHJpb3JfcGlwZWxpbmUuY29tcG9uZW50cy5pdGVtcygpJTdEJTBBcGlwZWxpbmUlMjAlM0QlMjBBdXRvUGlwZWxpbmVGb3JUZXh0MkltYWdlLmZyb21fcHJldHJhaW5lZCglMjJrYW5kaW5za3ktY29tbXVuaXR5JTJGa2FuZGluc2t5LTItMi1kZWNvZGVyJTIyJTJDJTIwKipwcmlvcl9jb21wb25lbnRzJTJDJTIwdG9yY2hfZHR5cGUlM0R0b3JjaC5mbG9hdDE2KSUwQSUwQXBpcGUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEFwcm9tcHQlM0QlMjJBJTIwcm9ib3QlMjBuYXJ1dG8lMkMlMjA0ayUyMHBob3RvJTIyJTBBaW1hZ2UlMjAlM0QlMjBwaXBlbGluZShwcm9tcHQlM0Rwcm9tcHQlMkMlMjBuZWdhdGl2ZV9wcm9tcHQlM0RuZWdhdGl2ZV9wcm9tcHQpLmltYWdlcyU1QjAlNUQ=",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForText2Image, DiffusionPipeline
<span class="hljs-keyword">import</span> torch
prior_pipeline = DiffusionPipeline.from_pretrained(output_dir, torch_dtype=torch.float16)
prior_components = {<span class="hljs-string">&quot;prior_&quot;</span> + k: v <span class="hljs-keyword">for</span> k,v <span class="hljs-keyword">in</span> prior_pipeline.components.items()}
pipeline = AutoPipelineForText2Image.from_pretrained(<span class="hljs-string">&quot;kandinsky-community/kandinsky-2-2-decoder&quot;</span>, **prior_components, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()
prompt=<span class="hljs-string">&quot;A robot naruto, 4k photo&quot;</span>
image = pipeline(prompt=prompt, negative_prompt=negative_prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){U(a.$$.fragment),M=i(),p=u("blockquote"),p.innerHTML=f,this.h()},l(o){J(a.$$.fragment,o),M=r(o),p=b(o,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),_(p)!=="svelte-16tklfl"&&(p.innerHTML=f),this.h()},h(){je(p,"class","tip")},m(o,y){h(a,o,y),l(o,M,y),l(o,p,y),n=!0},p:We,i(o){n||($(a.$$.fragment,o),n=!0)},o(o){T(a.$$.fragment,o),n=!1},d(o){o&&(t(M),t(p)),w(a,o)}}}function Pt(C){let a,M,p,f="对于 decoder 模型,您还可以从保存的检查点进行推理,这对于查看中间结果很有用。在这种情况下,将检查点加载到 UNet 中:",n,o,y;return a=new G({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9QaXBlbGluZUZvclRleHQySW1hZ2UlMEFpbXBvcnQlMjB0b3JjaCUwQSUwQXBpcGVsaW5lJTIwJTNEJTIwQXV0b1BpcGVsaW5lRm9yVGV4dDJJbWFnZS5mcm9tX3ByZXRyYWluZWQoJTIycGF0aCUyRnRvJTJGc2F2ZWQlMkZtb2RlbCUyMiUyQyUyMHRvcmNoX2R0eXBlJTNEdG9yY2guZmxvYXQxNiklMEFwaXBlbGluZS5lbmFibGVfbW9kZWxfY3B1X29mZmxvYWQoKSUwQSUwQXByb21wdCUzRCUyMkElMjByb2JvdCUyMG5hcnV0byUyQyUyMDRrJTIwcGhvdG8lMjIlMEFpbWFnZSUyMCUzRCUyMHBpcGVsaW5lKHByb21wdCUzRHByb21wdCkuaW1hZ2VzJTVCMCU1RA==",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForText2Image
<span class="hljs-keyword">import</span> torch
pipeline = AutoPipelineForText2Image.from_pretrained(<span class="hljs-string">&quot;path/to/saved/model&quot;</span>, torch_dtype=torch.float16)
pipeline.enable_model_cpu_offload()
prompt=<span class="hljs-string">&quot;A robot naruto, 4k photo&quot;</span>
image = pipeline(prompt=prompt).images[<span class="hljs-number">0</span>]`,wrap:!1}}),o=new G({props:{code:"ZnJvbSUyMGRpZmZ1c2VycyUyMGltcG9ydCUyMEF1dG9QaXBlbGluZUZvclRleHQySW1hZ2UlMkMlMjBVTmV0MkRDb25kaXRpb25Nb2RlbCUwQSUwQXVuZXQlMjAlM0QlMjBVTmV0MkRDb25kaXRpb25Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIycGF0aCUyRnRvJTJGc2F2ZWQlMkZtb2RlbCUyMiUyMCUyQiUyMCUyMiUyRmNoZWNrcG9pbnQtJTNDTiUzRSUyRnVuZXQlMjIpJTBBJTBBcGlwZWxpbmUlMjAlM0QlMjBBdXRvUGlwZWxpbmVGb3JUZXh0MkltYWdlLmZyb21fcHJldHJhaW5lZCglMjJrYW5kaW5za3ktY29tbXVuaXR5JTJGa2FuZGluc2t5LTItMi1kZWNvZGVyJTIyJTJDJTIwdW5ldCUzRHVuZXQlMkMlMjB0b3JjaF9kdHlwZSUzRHRvcmNoLmZsb2F0MTYpJTBBcGlwZWxpbmUuZW5hYmxlX21vZGVsX2NwdV9vZmZsb2FkKCklMEElMEFpbWFnZSUyMCUzRCUyMHBpcGVsaW5lKHByb21wdCUzRCUyMkElMjByb2JvdCUyMG5hcnV0byUyQyUyMDRrJTIwcGhvdG8lMjIpLmltYWdlcyU1QjAlNUQ=",highlighted:`<span class="hljs-keyword">from</span> diffusers <span class="hljs-keyword">import</span> AutoPipelineForText2Image, UNet2DConditionModel
unet = UNet2DConditionModel.from_pretrained(<span class="hljs-string">&quot;path/to/saved/model&quot;</span> + <span class="hljs-string">&quot;/checkpoint-&lt;N&gt;/unet&quot;</span>)
pipeline = AutoPipelineForText2Image.from_pretrained(<span class="hljs-string">&quot;kandinsky-community/kandinsky-2-2-decoder&quot;</span>, unet=unet, torch_dtype=torch.float16)
pipeline.enable_model_cpu_offload()
image = pipeline(prompt=<span class="hljs-string">&quot;A robot naruto, 4k photo&quot;</span>).images[<span class="hljs-number">0</span>]`,wrap:!1}}),{c(){U(a.$$.fragment),M=i(),p=u("p"),p.textContent=f,n=i(),U(o.$$.fragment)},l(d){J(a.$$.fragment,d),M=r(d),p=b(d,"P",{"data-svelte-h":!0}),_(p)!=="svelte-1cv5kc7"&&(p.textContent=f),n=r(d),J(o.$$.fragment,d)},m(d,j){h(a,d,j),l(d,M,j),l(d,p,j),l(d,n,j),h(o,d,j),y=!0},p:We,i(d){y||($(a.$$.fragment,d),$(o.$$.fragment,d),y=!0)},o(d){T(a.$$.fragment,d),T(o.$$.fragment,d),y=!1},d(d){d&&(t(M),t(p),t(n)),w(a,d),w(o,d)}}}function Dt(C){let a,M,p,f;return a=new ge({props:{id:"training-inference",option:"prior model",$$slots:{default:[St]},$$scope:{ctx:C}}}),p=new ge({props:{id:"training-inference",option:"decoder model",$$slots:{default:[Pt]},$$scope:{ctx:C}}}),{c(){U(a.$$.fragment),M=i(),U(p.$$.fragment)},l(n){J(a.$$.fragment,n),M=r(n),J(p.$$.fragment,n)},m(n,o){h(a,n,o),l(n,M,o),h(p,n,o),f=!0},p(n,o){const y={};o&2&&(y.$$scope={dirty:o,ctx:n}),a.$set(y);const d={};o&2&&(d.$$scope={dirty:o,ctx:n}),p.$set(d)},i(n){f||($(a.$$.fragment,n),$(p.$$.fragment,n),f=!0)},o(n){T(a.$$.fragment,n),T(p.$$.fragment,n),f=!1},d(n){n&&t(M),w(a,n),w(p,n)}}}function Kt(C){let a,M,p,f,n,o,y,d,j,E="<p>此脚本是实验性的,容易过拟合并遇到灾难性遗忘等问题。尝试探索不同的超参数以在您的数据集上获得最佳结果。</p>",X,W,N="Kandinsky 2.2 是一个多语言文本到图像模型,能够生成更逼真的图像。该模型包括一个图像先验模型,用于从文本提示创建图像嵌入,以及一个解码器模型,基于先验模型的嵌入生成图像。这就是为什么在 Diffusers 中您会找到两个独立的脚本用于 Kandinsky 2.2,一个用于训练先验模型,另一个用于训练解码器模型。您可以分别训练这两个模型,但为了获得最佳结果,您应该同时训练先验和解码器模型。",k,B,Q='根据您的 GPU,您可能需要启用 <code>gradient_checkpointing</code>(⚠️ 不支持先验模型!)、<code>mixed_precision</code> 和 <code>gradient_accumulation_steps</code> 来帮助将模型装入内存并加速训练。您可以通过启用 <a href="../optimization/xformers">xFormers</a> 的内存高效注意力来进一步减少内存使用(版本 <a href="https://github.com/huggingface/diffusers/issues/2234#issuecomment-1416931212" rel="nofollow">v0.0.16</a> 在某些 GPU 上训练时失败,因此您可能需要安装开发版本)。',R,V,v='本指南探讨了 <a href="https://github.com/huggingface/diffusers/blob/main/examples/kandinsky2_2/text_to_image/train_text_to_image_prior.py" rel="nofollow">train_text_to_image_prior.py</a> 和 <a href="https://github.com/huggingface/diffusers/blob/main/examples/kandinsky2_2/text_to_image/train_text_to_image_decoder.py" rel="nofollow">train_text_to_image_decoder.py</a> 脚本,以帮助您更熟悉它,以及如何根据您的用例进行调整。',H,x,c="在运行脚本之前,请确保从源代码安装库:",Z,I,P,F,m="然后导航到包含训练脚本的示例文件夹,并安装脚本所需的依赖项:",g,D,ke,Y,Mt='<p>🤗 Accelerate 是一个帮助您在多个 GPU/TPU 上或使用混合精度进行训练的库。它会根据您的硬件和环境自动配置训练设置。查看 🤗 Accelerate 的 <a href="https://huggingface.co/docs/accelerate/quicktour" rel="nofollow">快速入门</a> 了解更多。</p>',Re,K,ft="初始化一个 🤗 Accelerate 环境:",Ge,O,xe,ee,dt="要设置一个默认的 🤗 Accelerate 环境而不选择任何配置:",Ve,te,ve,le,yt="或者,如果您的环境不支持交互式 shell,比如 notebook,您可以使用:",He,se,Ie,ne,ut='最后,如果您想在自己的数据集上训练模型,请查看 <a href="create_dataset">创建用于训练的数据集</a> 指南,了解如何创建与训练脚本兼容的数据集。',Xe,A,bt="<p>以下部分重点介绍了训练脚本中对于理解如何修改它很重要的部分,但并未详细涵盖脚本的每个方面。如果您有兴趣了解更多,请随时阅读脚本,并让我们知道您有任何疑问或顾虑。</p>",Be,ae,Ne,pe,_t='训练脚本提供了许多参数来帮助您自定义训练运行。所有参数及其描述都可以在 <a href="https://github.com/huggingface/diffusers/blob/6e68c71503682c8693cb5b06a4da4911dfd655ee/examples/kandinsky2_2/text_to_image/train_text_to_image_prior.py#L190" rel="nofollow"><code>parse_args()</code></a> 函数中找到。训练脚本为每个参数提供了默认值,例如训练批次大小和学习率,但如果您愿意,也可以在训练命令中设置自己的值。',Qe,ie,Ut="例如,要使用 fp16 格式的混合精度加速训练,请在训练命令中添加 <code>--mixed_precision</code> 参数:",Fe,re,Ee,oe,Jt='大多数参数与 <a href="text2image#script-parameters">文本到图像</a> 训练指南中的参数相同,所以让我们直接进入 Kandinsky 训练脚本的 walkthrough!',Ye,me,Ae,ce,ht='<a href="https://huggingface.co/papers/2303.09556" rel="nofollow">Min-SNR</a> 加权策略可以通过重新平衡损失来帮助训练,实现更快的收敛。训练脚本支持预测 <code>epsilon</code>(噪声)或 <code>v_prediction</code>,但 Min-SNR 与两种预测类型都兼容。此加权策略仅由 PyTorch 支持,在 Flax 训练脚本中不可用。',Le,Me,$t="添加 <code>--snr_gamma</code> 参数并将其设置为推荐值 5.0:",ze,fe,qe,de,Se,ye,Tt='训练脚本也类似于 <a href="text2image#training-script">文本到图像</a> 训练指南,但已修改以支持训练 prior 和 decoder 模型。本指南重点介绍 Kandinsky 2.2 训练脚本中独特的代码。',Pe,L,De,ue,Ke,be,wt="一旦您完成了所有更改或接受默认配置,就可以启动训练脚本了!🚀",Oe,_e,jt='您将在<a href="https://huggingface.co/datasets/lambdalabs/naruto-blip-captions" rel="nofollow">Naruto BLIP 字幕</a>数据集上进行训练,以生成您自己的Naruto角色,但您也可以通过遵循<a href="create_dataset">创建用于训练的数据集</a>指南来创建和训练您自己的数据集。将环境变量 <code>DATASET_NAME</code> 设置为Hub上数据集的名称,或者如果您在自己的文件上训练,将环境变量 <code>TRAIN_DIR</code> 设置为数据集的路径。',et,Ue,Zt="如果您在多个GPU上训练,请在 <code>accelerate launch</code> 命令中添加 <code>--multi_gpu</code> 参数。",tt,z,gt=`<p>要使用Weights &amp; Biases监控训练进度,请在训练命令中添加 <code>--report_to=wandb</code> 参数。您还需要
建议在训练命令中添加 <code>--validation_prompt</code> 以跟踪结果。这对于调试模型和查看中间结果非常有用。</p>`,lt,q,st,Je,Wt="训练完成后,您可以使用新训练的模型进行推理!",nt,S,at,he,pt,$e,Ct="恭喜您训练了一个 Kandinsky 2.2 模型!要了解更多关于如何使用您的新模型的信息,以下指南可能会有所帮助:",it,Te,kt='<li>阅读 <a href="../using-diffusers/kandinsky">Kandinsky</a> 指南,学习如何将其用于各种不同的任务(文本到图像、图像到图像、修复、插值),以及如何与 ControlNet 结合使用。</li> <li>查看 <a href="dreambooth">DreamBooth</a> 和 <a href="lora">LoRA</a> 训练指南,学习如何使用少量示例图像训练个性化的 Kandinsky 模型。这两种训练技术甚至可以结合使用!</li>',rt,we,ot,Ce,mt;return n=new Qt({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),y=new Ze({props:{title:"Kandinsky 2.2",local:"kandinsky-22",headingTag:"h1"}}),I=new G({props:{code:"Z2l0JTIwY2xvbmUlMjBodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZodWdnaW5nZmFjZSUyRmRpZmZ1c2VycyUwQWNkJTIwZGlmZnVzZXJzJTBBcGlwJTIwaW5zdGFsbCUyMC4=",highlighted:`git <span class="hljs-built_in">clone</span> https://github.com/huggingface/diffusers
<span class="hljs-built_in">cd</span> diffusers
pip install .`,wrap:!1}}),D=new G({props:{code:"Y2QlMjBleGFtcGxlcyUyRmthbmRpbnNreTJfMiUyRnRleHRfdG9faW1hZ2UlMEFwaXAlMjBpbnN0YWxsJTIwLXIlMjByZXF1aXJlbWVudHMudHh0",highlighted:`<span class="hljs-built_in">cd</span> examples/kandinsky2_2/text_to_image
pip install -r requirements.txt`,wrap:!1}}),O=new G({props:{code:"YWNjZWxlcmF0ZSUyMGNvbmZpZw==",highlighted:"accelerate config",wrap:!1}}),te=new G({props:{code:"YWNjZWxlcmF0ZSUyMGNvbmZpZyUyMGRlZmF1bHQ=",highlighted:"accelerate config default",wrap:!1}}),se=new G({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUudXRpbHMlMjBpbXBvcnQlMjB3cml0ZV9iYXNpY19jb25maWclMEElMEF3cml0ZV9iYXNpY19jb25maWcoKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate.utils <span class="hljs-keyword">import</span> write_basic_config
write_basic_config()`,wrap:!1}}),ae=new Ze({props:{title:"脚本参数",local:"脚本参数",headingTag:"h2"}}),re=new G({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHRyYWluX3RleHRfdG9faW1hZ2VfcHJpb3IucHklMjAlNUMlMEElMjAlMjAtLW1peGVkX3ByZWNpc2lvbiUzRCUyMmZwMTYlMjI=",highlighted:`accelerate launch train_text_to_image_prior.py \\
--mixed_precision=<span class="hljs-string">&quot;fp16&quot;</span>`,wrap:!1}}),me=new Ze({props:{title:"Min-SNR 加权",local:"min-snr-加权",headingTag:"h3"}}),fe=new G({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHRyYWluX3RleHRfdG9faW1hZ2VfcHJpb3IucHklMjAlNUMlMEElMjAlMjAtLXNucl9nYW1tYSUzRDUuMA==",highlighted:`accelerate launch train_text_to_image_prior.py \\
--snr_gamma=5.0`,wrap:!1}}),de=new Ze({props:{title:"训练脚本",local:"训练脚本",headingTag:"h2"}}),L=new ct({props:{id:"script",options:["prior model","decoder model"],$$slots:{default:[At]},$$scope:{ctx:C}}}),ue=new Ze({props:{title:"启动脚本",local:"启动脚本",headingTag:"h2"}}),q=new ct({props:{id:"training-inference",options:["prior model","decoder model"],$$slots:{default:[qt]},$$scope:{ctx:C}}}),S=new ct({props:{id:"training-inference",options:["prior model","decoder model"],$$slots:{default:[Dt]},$$scope:{ctx:C}}}),he=new Ze({props:{title:"后续步骤",local:"后续步骤",headingTag:"h2"}}),we=new Ft({props:{source:"https://github.com/huggingface/diffusers/blob/main/docs/source/zh/training/kandinsky.md"}}),{c(){a=u("meta"),M=i(),p=u("p"),f=i(),U(n.$$.fragment),o=i(),U(y.$$.fragment),d=i(),j=u("blockquote"),j.innerHTML=E,X=i(),W=u("p"),W.textContent=N,k=i(),B=u("p"),B.innerHTML=Q,R=i(),V=u("p"),V.innerHTML=v,H=i(),x=u("p"),x.textContent=c,Z=i(),U(I.$$.fragment),P=i(),F=u("p"),F.textContent=m,g=i(),U(D.$$.fragment),ke=i(),Y=u("blockquote"),Y.innerHTML=Mt,Re=i(),K=u("p"),K.textContent=ft,Ge=i(),U(O.$$.fragment),xe=i(),ee=u("p"),ee.textContent=dt,Ve=i(),U(te.$$.fragment),ve=i(),le=u("p"),le.textContent=yt,He=i(),U(se.$$.fragment),Ie=i(),ne=u("p"),ne.innerHTML=ut,Xe=i(),A=u("blockquote"),A.innerHTML=bt,Be=i(),U(ae.$$.fragment),Ne=i(),pe=u("p"),pe.innerHTML=_t,Qe=i(),ie=u("p"),ie.innerHTML=Ut,Fe=i(),U(re.$$.fragment),Ee=i(),oe=u("p"),oe.innerHTML=Jt,Ye=i(),U(me.$$.fragment),Ae=i(),ce=u("p"),ce.innerHTML=ht,Le=i(),Me=u("p"),Me.innerHTML=$t,ze=i(),U(fe.$$.fragment),qe=i(),U(de.$$.fragment),Se=i(),ye=u("p"),ye.innerHTML=Tt,Pe=i(),U(L.$$.fragment),De=i(),U(ue.$$.fragment),Ke=i(),be=u("p"),be.textContent=wt,Oe=i(),_e=u("p"),_e.innerHTML=jt,et=i(),Ue=u("p"),Ue.innerHTML=Zt,tt=i(),z=u("blockquote"),z.innerHTML=gt,lt=i(),U(q.$$.fragment),st=i(),Je=u("p"),Je.textContent=Wt,nt=i(),U(S.$$.fragment),at=i(),U(he.$$.fragment),pt=i(),$e=u("p"),$e.textContent=Ct,it=i(),Te=u("ul"),Te.innerHTML=kt,rt=i(),U(we.$$.fragment),ot=i(),Ce=u("p"),this.h()},l(e){const s=Bt("svelte-u9bgzb",document.head);a=b(s,"META",{name:!0,content:!0}),s.forEach(t),M=r(e),p=b(e,"P",{}),Vt(p).forEach(t),f=r(e),J(n.$$.fragment,e),o=r(e),J(y.$$.fragment,e),d=r(e),j=b(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),_(j)!=="svelte-10u3mwr"&&(j.innerHTML=E),X=r(e),W=b(e,"P",{"data-svelte-h":!0}),_(W)!=="svelte-1rjrzwn"&&(W.textContent=N),k=r(e),B=b(e,"P",{"data-svelte-h":!0}),_(B)!=="svelte-1wrjuq9"&&(B.innerHTML=Q),R=r(e),V=b(e,"P",{"data-svelte-h":!0}),_(V)!=="svelte-183ogqr"&&(V.innerHTML=v),H=r(e),x=b(e,"P",{"data-svelte-h":!0}),_(x)!=="svelte-sgtnc7"&&(x.textContent=c),Z=r(e),J(I.$$.fragment,e),P=r(e),F=b(e,"P",{"data-svelte-h":!0}),_(F)!=="svelte-9jjxff"&&(F.textContent=m),g=r(e),J(D.$$.fragment,e),ke=r(e),Y=b(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),_(Y)!=="svelte-18peymk"&&(Y.innerHTML=Mt),Re=r(e),K=b(e,"P",{"data-svelte-h":!0}),_(K)!=="svelte-1cwu974"&&(K.textContent=ft),Ge=r(e),J(O.$$.fragment,e),xe=r(e),ee=b(e,"P",{"data-svelte-h":!0}),_(ee)!=="svelte-6b3ch3"&&(ee.textContent=dt),Ve=r(e),J(te.$$.fragment,e),ve=r(e),le=b(e,"P",{"data-svelte-h":!0}),_(le)!=="svelte-w9j6kh"&&(le.textContent=yt),He=r(e),J(se.$$.fragment,e),Ie=r(e),ne=b(e,"P",{"data-svelte-h":!0}),_(ne)!=="svelte-1jgrevq"&&(ne.innerHTML=ut),Xe=r(e),A=b(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),_(A)!=="svelte-xb53wy"&&(A.innerHTML=bt),Be=r(e),J(ae.$$.fragment,e),Ne=r(e),pe=b(e,"P",{"data-svelte-h":!0}),_(pe)!=="svelte-dxr8zk"&&(pe.innerHTML=_t),Qe=r(e),ie=b(e,"P",{"data-svelte-h":!0}),_(ie)!=="svelte-1q1qn86"&&(ie.innerHTML=Ut),Fe=r(e),J(re.$$.fragment,e),Ee=r(e),oe=b(e,"P",{"data-svelte-h":!0}),_(oe)!=="svelte-1sjmcjj"&&(oe.innerHTML=Jt),Ye=r(e),J(me.$$.fragment,e),Ae=r(e),ce=b(e,"P",{"data-svelte-h":!0}),_(ce)!=="svelte-1uzpqtn"&&(ce.innerHTML=ht),Le=r(e),Me=b(e,"P",{"data-svelte-h":!0}),_(Me)!=="svelte-1dvfh3c"&&(Me.innerHTML=$t),ze=r(e),J(fe.$$.fragment,e),qe=r(e),J(de.$$.fragment,e),Se=r(e),ye=b(e,"P",{"data-svelte-h":!0}),_(ye)!=="svelte-1ihvk28"&&(ye.innerHTML=Tt),Pe=r(e),J(L.$$.fragment,e),De=r(e),J(ue.$$.fragment,e),Ke=r(e),be=b(e,"P",{"data-svelte-h":!0}),_(be)!=="svelte-j4yqef"&&(be.textContent=wt),Oe=r(e),_e=b(e,"P",{"data-svelte-h":!0}),_(_e)!=="svelte-1qykf4g"&&(_e.innerHTML=jt),et=r(e),Ue=b(e,"P",{"data-svelte-h":!0}),_(Ue)!=="svelte-14drp3g"&&(Ue.innerHTML=Zt),tt=r(e),z=b(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),_(z)!=="svelte-1lfotjt"&&(z.innerHTML=gt),lt=r(e),J(q.$$.fragment,e),st=r(e),Je=b(e,"P",{"data-svelte-h":!0}),_(Je)!=="svelte-2s2vlp"&&(Je.textContent=Wt),nt=r(e),J(S.$$.fragment,e),at=r(e),J(he.$$.fragment,e),pt=r(e),$e=b(e,"P",{"data-svelte-h":!0}),_($e)!=="svelte-10blbi9"&&($e.textContent=Ct),it=r(e),Te=b(e,"UL",{"data-svelte-h":!0}),_(Te)!=="svelte-g3c9xr"&&(Te.innerHTML=kt),rt=r(e),J(we.$$.fragment,e),ot=r(e),Ce=b(e,"P",{}),Vt(Ce).forEach(t),this.h()},h(){je(a,"name","hf:doc:metadata"),je(a,"content",Ot),je(j,"class","warning"),je(Y,"class","tip"),je(A,"class","tip"),je(z,"class","tip")},m(e,s){Nt(document.head,a),l(e,M,s),l(e,p,s),l(e,f,s),h(n,e,s),l(e,o,s),h(y,e,s),l(e,d,s),l(e,j,s),l(e,X,s),l(e,W,s),l(e,k,s),l(e,B,s),l(e,R,s),l(e,V,s),l(e,H,s),l(e,x,s),l(e,Z,s),h(I,e,s),l(e,P,s),l(e,F,s),l(e,g,s),h(D,e,s),l(e,ke,s),l(e,Y,s),l(e,Re,s),l(e,K,s),l(e,Ge,s),h(O,e,s),l(e,xe,s),l(e,ee,s),l(e,Ve,s),h(te,e,s),l(e,ve,s),l(e,le,s),l(e,He,s),h(se,e,s),l(e,Ie,s),l(e,ne,s),l(e,Xe,s),l(e,A,s),l(e,Be,s),h(ae,e,s),l(e,Ne,s),l(e,pe,s),l(e,Qe,s),l(e,ie,s),l(e,Fe,s),h(re,e,s),l(e,Ee,s),l(e,oe,s),l(e,Ye,s),h(me,e,s),l(e,Ae,s),l(e,ce,s),l(e,Le,s),l(e,Me,s),l(e,ze,s),h(fe,e,s),l(e,qe,s),h(de,e,s),l(e,Se,s),l(e,ye,s),l(e,Pe,s),h(L,e,s),l(e,De,s),h(ue,e,s),l(e,Ke,s),l(e,be,s),l(e,Oe,s),l(e,_e,s),l(e,et,s),l(e,Ue,s),l(e,tt,s),l(e,z,s),l(e,lt,s),h(q,e,s),l(e,st,s),l(e,Je,s),l(e,nt,s),h(S,e,s),l(e,at,s),h(he,e,s),l(e,pt,s),l(e,$e,s),l(e,it,s),l(e,Te,s),l(e,rt,s),h(we,e,s),l(e,ot,s),l(e,Ce,s),mt=!0},p(e,[s]){const Rt={};s&2&&(Rt.$$scope={dirty:s,ctx:e}),L.$set(Rt);const Gt={};s&2&&(Gt.$$scope={dirty:s,ctx:e}),q.$set(Gt);const xt={};s&2&&(xt.$$scope={dirty:s,ctx:e}),S.$set(xt)},i(e){mt||($(n.$$.fragment,e),$(y.$$.fragment,e),$(I.$$.fragment,e),$(D.$$.fragment,e),$(O.$$.fragment,e),$(te.$$.fragment,e),$(se.$$.fragment,e),$(ae.$$.fragment,e),$(re.$$.fragment,e),$(me.$$.fragment,e),$(fe.$$.fragment,e),$(de.$$.fragment,e),$(L.$$.fragment,e),$(ue.$$.fragment,e),$(q.$$.fragment,e),$(S.$$.fragment,e),$(he.$$.fragment,e),$(we.$$.fragment,e),mt=!0)},o(e){T(n.$$.fragment,e),T(y.$$.fragment,e),T(I.$$.fragment,e),T(D.$$.fragment,e),T(O.$$.fragment,e),T(te.$$.fragment,e),T(se.$$.fragment,e),T(ae.$$.fragment,e),T(re.$$.fragment,e),T(me.$$.fragment,e),T(fe.$$.fragment,e),T(de.$$.fragment,e),T(L.$$.fragment,e),T(ue.$$.fragment,e),T(q.$$.fragment,e),T(S.$$.fragment,e),T(he.$$.fragment,e),T(we.$$.fragment,e),mt=!1},d(e){e&&(t(M),t(p),t(f),t(o),t(d),t(j),t(X),t(W),t(k),t(B),t(R),t(V),t(H),t(x),t(Z),t(P),t(F),t(g),t(ke),t(Y),t(Re),t(K),t(Ge),t(xe),t(ee),t(Ve),t(ve),t(le),t(He),t(Ie),t(ne),t(Xe),t(A),t(Be),t(Ne),t(pe),t(Qe),t(ie),t(Fe),t(Ee),t(oe),t(Ye),t(Ae),t(ce),t(Le),t(Me),t(ze),t(qe),t(Se),t(ye),t(Pe),t(De),t(Ke),t(be),t(Oe),t(_e),t(et),t(Ue),t(tt),t(z),t(lt),t(st),t(Je),t(nt),t(at),t(pt),t($e),t(it),t(Te),t(rt),t(ot),t(Ce)),t(a),w(n,e),w(y,e),w(I,e),w(D,e),w(O,e),w(te,e),w(se,e),w(ae,e),w(re,e),w(me,e),w(fe,e),w(de,e),w(L,e),w(ue,e),w(q,e),w(S,e),w(he,e),w(we,e)}}}const Ot='{"title":"Kandinsky 2.2","local":"kandinsky-22","sections":[{"title":"脚本参数","local":"脚本参数","sections":[{"title":"Min-SNR 加权","local":"min-snr-加权","sections":[],"depth":3}],"depth":2},{"title":"训练脚本","local":"训练脚本","sections":[],"depth":2},{"title":"启动脚本","local":"启动脚本","sections":[],"depth":2},{"title":"后续步骤","local":"后续步骤","sections":[],"depth":2}],"depth":1}';function el(C){return Ht(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class pl extends It{constructor(a){super(),Xt(this,a,el,Kt,vt,{})}}export{pl as component};

Xet Storage Details

Size:
42.5 kB
·
Xet hash:
69c7b6e188afe7884a7d917ab858b0f3378a8bc20c5e1b033c4f62ce9caa9977

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.