Buckets:

HuggingFaceDocBuilder's picture
download
raw
5.52 kB
import{s as te,n as ie,o as ae}from"../chunks/scheduler.b9285784.js";import{S as ne,i as se,e as o,s as n,c as _,h as le,a as r,d as i,b as s,f as O,g as b,j as v,k as ee,l as oe,m as a,n as P,t as x,o as C,p as U}from"../chunks/index.26bc89a1.js";import{C as re,H as Y,E as pe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.7a0ae628.js";import{C as me}from"../chunks/CodeBlock.844ff9c3.js";function fe(j){let l,L,M,H,p,z,m,E,f,q='A <a href="https://cloud.google.com/tpu/docs/intro-to-tpu" rel="nofollow">TPU (Tensor Processing Unit)</a> is a type of hardware specifically designed for training models efficiently. Accelerate supports TPU training, but there are a few things you should be aware of, namely graph compilation. This tutorial briefly discusses compilation, and for more details, take a look at the <a href="../concept_guides/training_tpu">Training on TPUs with Accelerate</a> guide.',R,h,S,c,Q="A TPU creates a graph of all the operations in the training step such as the forward pass, backward pass and optimizer step. This is why the first training step always takes a while because building and compiling this graph takes time. But once compilation is complete, it is cached and all subsequent steps are much faster.",A,u,D="The key is to avoid compiling your code again or else training is super slow. This means all your operations must be exactly the same:",B,g,I="<li>all tensors in your batches must have the same length (for example, no dynamic padding for NLP tasks)</li> <li>your code must be static (for example, no layers with for loops that have different lengths depending on the input such as a LSTM)</li>",G,d,J,$,F='A common language model design is to tie the weights of the embedding and softmax layers. However, moving the model to a TPU (either yourself or passing it to the <a href="/docs/accelerate/pr_4021/en/package_reference/accelerator#accelerate.Accelerator.prepare">prepare()</a> method) breaks the weight tying and you’ll need to retie the weights.',N,y,K='To add special behavior (like weight tying) in your script for TPUs, set <code>distributed_type</code> to <code>DistributedType.TPU</code> first. Then you can use the <a href="https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.tie_weights" rel="nofollow">tie_weights</a> method to tie the weights.',V,w,W,T,X,k,Z;return p=new re({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),m=new Y({props:{title:"TPU training",local:"tpu-training",headingTag:"h1"}}),h=new Y({props:{title:"Compilation",local:"compilation",headingTag:"h2"}}),d=new Y({props:{title:"Weight tying",local:"weight-tying",headingTag:"h2"}}),w=new me({props:{code:"aWYlMjBhY2NlbGVyYXRvci5kaXN0cmlidXRlZF90eXBlJTIwJTNEJTNEJTIwRGlzdHJpYnV0ZWRUeXBlLlRQVSUzQSUwQSUyMCUyMCUyMCUyMG1vZGVsLnRpZV93ZWlnaHRzKCk=",highlighted:`<span class="hljs-keyword">if</span> accelerator.distributed_type == DistributedType.TPU:
model.tie_weights()`,wrap:!1}}),T=new pe({props:{source:"https://github.com/huggingface/accelerate/blob/main/docs/source/basic_tutorials/tpu.md"}}),{c(){l=o("meta"),L=n(),M=o("p"),H=n(),_(p.$$.fragment),z=n(),_(m.$$.fragment),E=n(),f=o("p"),f.innerHTML=q,R=n(),_(h.$$.fragment),S=n(),c=o("p"),c.textContent=Q,A=n(),u=o("p"),u.textContent=D,B=n(),g=o("ul"),g.innerHTML=I,G=n(),_(d.$$.fragment),J=n(),$=o("p"),$.innerHTML=F,N=n(),y=o("p"),y.innerHTML=K,V=n(),_(w.$$.fragment),W=n(),_(T.$$.fragment),X=n(),k=o("p"),this.h()},l(e){const t=le("svelte-u9bgzb",document.head);l=r(t,"META",{name:!0,content:!0}),t.forEach(i),L=s(e),M=r(e,"P",{}),O(M).forEach(i),H=s(e),b(p.$$.fragment,e),z=s(e),b(m.$$.fragment,e),E=s(e),f=r(e,"P",{"data-svelte-h":!0}),v(f)!=="svelte-1efoa6g"&&(f.innerHTML=q),R=s(e),b(h.$$.fragment,e),S=s(e),c=r(e,"P",{"data-svelte-h":!0}),v(c)!=="svelte-76he8q"&&(c.textContent=Q),A=s(e),u=r(e,"P",{"data-svelte-h":!0}),v(u)!=="svelte-1ga0n4t"&&(u.textContent=D),B=s(e),g=r(e,"UL",{"data-svelte-h":!0}),v(g)!=="svelte-fxztf"&&(g.innerHTML=I),G=s(e),b(d.$$.fragment,e),J=s(e),$=r(e,"P",{"data-svelte-h":!0}),v($)!=="svelte-15hzni8"&&($.innerHTML=F),N=s(e),y=r(e,"P",{"data-svelte-h":!0}),v(y)!=="svelte-w62ir7"&&(y.innerHTML=K),V=s(e),b(w.$$.fragment,e),W=s(e),b(T.$$.fragment,e),X=s(e),k=r(e,"P",{}),O(k).forEach(i),this.h()},h(){ee(l,"name","hf:doc:metadata"),ee(l,"content",he)},m(e,t){oe(document.head,l),a(e,L,t),a(e,M,t),a(e,H,t),P(p,e,t),a(e,z,t),P(m,e,t),a(e,E,t),a(e,f,t),a(e,R,t),P(h,e,t),a(e,S,t),a(e,c,t),a(e,A,t),a(e,u,t),a(e,B,t),a(e,g,t),a(e,G,t),P(d,e,t),a(e,J,t),a(e,$,t),a(e,N,t),a(e,y,t),a(e,V,t),P(w,e,t),a(e,W,t),P(T,e,t),a(e,X,t),a(e,k,t),Z=!0},p:ie,i(e){Z||(x(p.$$.fragment,e),x(m.$$.fragment,e),x(h.$$.fragment,e),x(d.$$.fragment,e),x(w.$$.fragment,e),x(T.$$.fragment,e),Z=!0)},o(e){C(p.$$.fragment,e),C(m.$$.fragment,e),C(h.$$.fragment,e),C(d.$$.fragment,e),C(w.$$.fragment,e),C(T.$$.fragment,e),Z=!1},d(e){e&&(i(L),i(M),i(H),i(z),i(E),i(f),i(R),i(S),i(c),i(A),i(u),i(B),i(g),i(G),i(J),i($),i(N),i(y),i(V),i(W),i(X),i(k)),i(l),U(p,e),U(m,e),U(h,e),U(d,e),U(w,e),U(T,e)}}}const he='{"title":"TPU training","local":"tpu-training","sections":[{"title":"Compilation","local":"compilation","sections":[],"depth":2},{"title":"Weight tying","local":"weight-tying","sections":[],"depth":2}],"depth":1}';function ce(j){return ae(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class ye extends ne{constructor(l){super(),se(this,l,ce,fe,te,{})}}export{ye as component};

Xet Storage Details

Size:
5.52 kB
·
Xet hash:
dc7b647b1ce5bac2c282ffdbce92ef29d4241210ccbc215361b18bbff439575c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.