Buckets:

HuggingFaceDocBuilder's picture
download
raw
38.2 kB
import{s as Es,o as Cs,n as R}from"../chunks/scheduler.b9285784.js";import{S as Ws,i as Vs,e as b,s as i,c as d,h as Ys,a as u,d as n,b as w,f as _s,g as m,j as I,k as $s,l as vs,m as p,n as y,t as M,o as J,p as T}from"../chunks/index.26bc89a1.js";import{T as gs}from"../chunks/Tip.e4eba3d6.js";import{C as Fs,H as x,E as Qs}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.7a0ae628.js";import{C as X}from"../chunks/CodeBlock.844ff9c3.js";import{H as hs,a as K}from"../chunks/HfOption.76c7ca3e.js";function Ns(h){let a,o;return a=new X({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdG9yY2gubm4ucGFyYWxsZWwlMjBpbXBvcnQlMjBEaXN0cmlidXRlZERhdGFQYXJhbGxlbCUyMGFzJTIwRERQJTBBZnJvbSUyMHRvcmNoLmRpc3RyaWJ1dGVkLmFsZ29yaXRobXMuZGRwX2NvbW1faG9va3MlMjBpbXBvcnQlMjBkZWZhdWx0X2hvb2tzJTBBZnJvbSUyMGFjY2VsZXJhdGUudGVzdF91dGlscy50ZXN0aW5nJTIwaW1wb3J0JTIwZ2V0X2JhY2tlbmQlMEElMEFkZXZpY2VfdHlwZSUyQyUyMF8lMkMlMjBfJTIwJTNEJTIwZ2V0X2JhY2tlbmQoKSUwQWRldmljZV9pZCUyMCUzRCUyMGdldGF0dHIodG9yY2glMkMlMjBkZXZpY2VfdHlwZSUyQyUyMHRvcmNoLmN1ZGEpLmN1cnJlbnRfZGV2aWNlKCklMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQW1vZGVsJTIwJTNEJTIwTXlNb2RlbCgpJTBBbW9kZWwlMjAlM0QlMjBERFAobW9kZWwlMkMlMjBkZXZpY2VfaWRzJTNEJTVCZGV2aWNlX2lkJTVEKSUwQW1vZGVsLnJlZ2lzdGVyX2NvbW1faG9vayhzdGF0ZSUzRE5vbmUlMkMlMjBob29rJTNEZGVmYXVsdF9ob29rcy5mcDE2X2NvbXByZXNzX2hvb2spJTBBJTBBJTIzJTIwVHJhaW5pbmclMjBsb29wJTBBZm9yJTIwZGF0YSUyQyUyMHRhcmdldHMlMjBpbiUyMGRhdGFfbG9hZGVyJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMG1vZGVsKGRhdGEpJTBBJTIwJTIwJTIwJTIwbG9zcyUyMCUzRCUyMGNyaXRlcmlvbihvdXRwdXRzJTJDJTIwdGFyZ2V0cyklMEElMjAlMjAlMjAlMjBsb3NzLmJhY2t3YXJkKCklMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIuc3RlcCgpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnplcm9fZ3JhZCgp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> torch.nn.parallel <span class="hljs-keyword">import</span> DistributedDataParallel <span class="hljs-keyword">as</span> DDP
<span class="hljs-keyword">from</span> torch.distributed.algorithms.ddp_comm_hooks <span class="hljs-keyword">import</span> default_hooks
<span class="hljs-keyword">from</span> accelerate.test_utils.testing <span class="hljs-keyword">import</span> get_backend
device_type, _, _ = get_backend()
device_id = <span class="hljs-built_in">getattr</span>(torch, device_type, torch.cuda).current_device()
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
model = MyModel()
model = DDP(model, device_ids=[device_id])
model.register_comm_hook(state=<span class="hljs-literal">None</span>, hook=default_hooks.fp16_compress_hook)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){d(a.$$.fragment)},l(l){m(a.$$.fragment,l)},m(l,r){y(a,l,r),o=!0},p:R,i(l){o||(M(a.$$.fragment,l),o=!0)},o(l){J(a.$$.fragment,l),o=!1},d(l){T(a,l)}}}function Ss(h){let a,o;return a=new X({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEREUENvbW11bmljYXRpb25Ib29rVHlwZSUyQyUyMERpc3RyaWJ1dGVkRGF0YVBhcmFsbGVsS3dhcmdzJTBBaW1wb3J0JTIwdG9yY2glMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQSUyMyUyMEREUCUyMENvbW11bmljYXRpb24lMjBIb29rJTIwc2V0dXAlMEFkZHBfa3dhcmdzJTIwJTNEJTIwRGlzdHJpYnV0ZWREYXRhUGFyYWxsZWxLd2FyZ3MoY29tbV9ob29rJTNERERQQ29tbXVuaWNhdGlvbkhvb2tUeXBlLkZQMTYpJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcihrd2FyZ3NfaGFuZGxlcnMlM0QlNUJkZHBfa3dhcmdzJTVEKSUwQSUwQW1vZGVsJTIwJTNEJTIwTXlNb2RlbCgpJTBBb3B0aW1pemVyJTIwJTNEJTIwdG9yY2gub3B0aW0uQWRhbShtb2RlbC5wYXJhbWV0ZXJzKCkpJTBBZGF0YV9sb2FkZXIlMjAlM0QlMjBEYXRhTG9hZGVyKGRhdGFzZXQlMkMlMjBiYXRjaF9zaXplJTNEMTYpJTBBJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUobW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciklMEElMEElMjMlMjBUcmFpbmluZyUyMGxvb3AlMEFmb3IlMjBkYXRhJTJDJTIwdGFyZ2V0cyUyMGluJTIwZGF0YV9sb2FkZXIlM0ElMEElMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoZGF0YSklMEElMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwY3JpdGVyaW9uKG91dHB1dHMlMkMlMjB0YXJnZXRzKSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, DDPCommunicationHookType, DistributedDataParallelKwargs
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
<span class="hljs-comment"># DDP Communication Hook setup</span>
ddp_kwargs = DistributedDataParallelKwargs(comm_hook=DDPCommunicationHookType.FP16)
accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
model = MyModel()
optimizer = torch.optim.Adam(model.parameters())
data_loader = DataLoader(dataset, batch_size=<span class="hljs-number">16</span>)
model, optimizer, data_loader = accelerator.prepare(model, optimizer, data_loader)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
accelerator.backward(loss)
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){d(a.$$.fragment)},l(l){m(a.$$.fragment,l)},m(l,r){y(a,l,r),o=!0},p:R,i(l){o||(M(a.$$.fragment,l),o=!0)},o(l){J(a.$$.fragment,l),o=!1},d(l){T(a,l)}}}function zs(h){let a,o,l,r;return a=new K({props:{id:"fp16",option:"PyTorch",$$slots:{default:[Ns]},$$scope:{ctx:h}}}),l=new K({props:{id:"fp16",option:"Accelerate",$$slots:{default:[Ss]},$$scope:{ctx:h}}}),{c(){d(a.$$.fragment),o=i(),d(l.$$.fragment)},l(e){m(a.$$.fragment,e),o=w(e),m(l.$$.fragment,e)},m(e,c){y(a,e,c),p(e,o,c),y(l,e,c),r=!0},p(e,c){const U={};c&2&&(U.$$scope={dirty:c,ctx:e}),a.$set(U);const f={};c&2&&(f.$$scope={dirty:c,ctx:e}),l.$set(f)},i(e){r||(M(a.$$.fragment,e),M(l.$$.fragment,e),r=!0)},o(e){J(a.$$.fragment,e),J(l.$$.fragment,e),r=!1},d(e){e&&n(o),T(a,e),T(l,e)}}}function Hs(h){let a,o="BF16 Compression Hook API is experimental, and it requires NCCL version later than 2.9.6.";return{c(){a=b("p"),a.textContent=o},l(l){a=u(l,"P",{"data-svelte-h":!0}),I(a)!=="svelte-1s0p8uc"&&(a.textContent=o)},m(l,r){p(l,a,r)},p:R,d(l){l&&n(a)}}}function Ds(h){let a,o;return a=new X({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdG9yY2gubm4ucGFyYWxsZWwlMjBpbXBvcnQlMjBEaXN0cmlidXRlZERhdGFQYXJhbGxlbCUyMGFzJTIwRERQJTBBZnJvbSUyMHRvcmNoLmRpc3RyaWJ1dGVkLmFsZ29yaXRobXMuZGRwX2NvbW1faG9va3MlMjBpbXBvcnQlMjBkZWZhdWx0X2hvb2tzJTBBZnJvbSUyMGFjY2VsZXJhdGUudGVzdF91dGlscy50ZXN0aW5nJTIwaW1wb3J0JTIwZ2V0X2JhY2tlbmQlMEElMEFkZXZpY2VfdHlwZSUyQyUyMF8lMkMlMjBfJTIwJTNEJTIwZ2V0X2JhY2tlbmQoKSUwQWRldmljZV9pZCUyMCUzRCUyMGdldGF0dHIodG9yY2glMkMlMjBkZXZpY2VfdHlwZSUyQyUyMHRvcmNoLmN1ZGEpLmN1cnJlbnRfZGV2aWNlKCklMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQW1vZGVsJTIwJTNEJTIwTXlNb2RlbCgpJTBBbW9kZWwlMjAlM0QlMjBERFAobW9kZWwlMkMlMjBkZXZpY2VfaWRzJTNEJTVCZGV2aWNlX2lkJTVEKSUwQW1vZGVsLnJlZ2lzdGVyX2NvbW1faG9vayhzdGF0ZSUzRE5vbmUlMkMlMjBob29rJTNEZGVmYXVsdF9ob29rcy5iZjE2X2NvbXByZXNzX2hvb2spJTBBJTBBJTIzJTIwVHJhaW5pbmclMjBsb29wJTBBZm9yJTIwZGF0YSUyQyUyMHRhcmdldHMlMjBpbiUyMGRhdGFfbG9hZGVyJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMG1vZGVsKGRhdGEpJTBBJTIwJTIwJTIwJTIwbG9zcyUyMCUzRCUyMGNyaXRlcmlvbihvdXRwdXRzJTJDJTIwdGFyZ2V0cyklMEElMjAlMjAlMjAlMjBsb3NzLmJhY2t3YXJkKCklMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIuc3RlcCgpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnplcm9fZ3JhZCgp",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> torch.nn.parallel <span class="hljs-keyword">import</span> DistributedDataParallel <span class="hljs-keyword">as</span> DDP
<span class="hljs-keyword">from</span> torch.distributed.algorithms.ddp_comm_hooks <span class="hljs-keyword">import</span> default_hooks
<span class="hljs-keyword">from</span> accelerate.test_utils.testing <span class="hljs-keyword">import</span> get_backend
device_type, _, _ = get_backend()
device_id = <span class="hljs-built_in">getattr</span>(torch, device_type, torch.cuda).current_device()
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
model = MyModel()
model = DDP(model, device_ids=[device_id])
model.register_comm_hook(state=<span class="hljs-literal">None</span>, hook=default_hooks.bf16_compress_hook)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){d(a.$$.fragment)},l(l){m(a.$$.fragment,l)},m(l,r){y(a,l,r),o=!0},p:R,i(l){o||(M(a.$$.fragment,l),o=!0)},o(l){J(a.$$.fragment,l),o=!1},d(l){T(a,l)}}}function xs(h){let a,o;return a=new X({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEREUENvbW11bmljYXRpb25Ib29rVHlwZSUyQyUyMERpc3RyaWJ1dGVkRGF0YVBhcmFsbGVsS3dhcmdzJTBBaW1wb3J0JTIwdG9yY2glMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQSUyMyUyMEREUCUyMENvbW11bmljYXRpb24lMjBIb29rJTIwc2V0dXAlMEFkZHBfa3dhcmdzJTIwJTNEJTIwRGlzdHJpYnV0ZWREYXRhUGFyYWxsZWxLd2FyZ3MoY29tbV9ob29rJTNERERQQ29tbXVuaWNhdGlvbkhvb2tUeXBlLkJGMTYpJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcihrd2FyZ3NfaGFuZGxlcnMlM0QlNUJkZHBfa3dhcmdzJTVEKSUwQSUwQW1vZGVsJTIwJTNEJTIwTXlNb2RlbCgpJTBBb3B0aW1pemVyJTIwJTNEJTIwdG9yY2gub3B0aW0uQWRhbShtb2RlbC5wYXJhbWV0ZXJzKCkpJTBBZGF0YV9sb2FkZXIlMjAlM0QlMjBEYXRhTG9hZGVyKGRhdGFzZXQlMkMlMjBiYXRjaF9zaXplJTNEMTYpJTBBJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUobW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciklMEElMEElMjMlMjBUcmFpbmluZyUyMGxvb3AlMEFmb3IlMjBkYXRhJTJDJTIwdGFyZ2V0cyUyMGluJTIwZGF0YV9sb2FkZXIlM0ElMEElMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoZGF0YSklMEElMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwY3JpdGVyaW9uKG91dHB1dHMlMkMlMjB0YXJnZXRzKSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, DDPCommunicationHookType, DistributedDataParallelKwargs
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
<span class="hljs-comment"># DDP Communication Hook setup</span>
ddp_kwargs = DistributedDataParallelKwargs(comm_hook=DDPCommunicationHookType.BF16)
accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
model = MyModel()
optimizer = torch.optim.Adam(model.parameters())
data_loader = DataLoader(dataset, batch_size=<span class="hljs-number">16</span>)
model, optimizer, data_loader = accelerator.prepare(model, optimizer, data_loader)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
accelerator.backward(loss)
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){d(a.$$.fragment)},l(l){m(a.$$.fragment,l)},m(l,r){y(a,l,r),o=!0},p:R,i(l){o||(M(a.$$.fragment,l),o=!0)},o(l){J(a.$$.fragment,l),o=!1},d(l){T(a,l)}}}function Ks(h){let a,o,l,r;return a=new K({props:{id:"bf16",option:"PyTorch",$$slots:{default:[Ds]},$$scope:{ctx:h}}}),l=new K({props:{id:"bf16",option:"Accelerate",$$slots:{default:[xs]},$$scope:{ctx:h}}}),{c(){d(a.$$.fragment),o=i(),d(l.$$.fragment)},l(e){m(a.$$.fragment,e),o=w(e),m(l.$$.fragment,e)},m(e,c){y(a,e,c),p(e,o,c),y(l,e,c),r=!0},p(e,c){const U={};c&2&&(U.$$scope={dirty:c,ctx:e}),a.$set(U);const f={};c&2&&(f.$$scope={dirty:c,ctx:e}),l.$set(f)},i(e){r||(M(a.$$.fragment,e),M(l.$$.fragment,e),r=!0)},o(e){J(a.$$.fragment,e),J(l.$$.fragment,e),r=!1},d(e){e&&n(o),T(a,e),T(l,e)}}}function As(h){let a,o="PowerSGD typically requires extra memory of the same size as the model’s gradients to enable error feedback, which can compensate for biased compressed communication and improve accuracy.";return{c(){a=b("p"),a.textContent=o},l(l){a=u(l,"P",{"data-svelte-h":!0}),I(a)!=="svelte-1f2etf0"&&(a.textContent=o)},m(l,r){p(l,a,r)},p:R,d(l){l&&n(a)}}}function Ps(h){let a,o;return a=new X({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdG9yY2gubm4ucGFyYWxsZWwlMjBpbXBvcnQlMjBEaXN0cmlidXRlZERhdGFQYXJhbGxlbCUyMGFzJTIwRERQJTBBZnJvbSUyMHRvcmNoLmRpc3RyaWJ1dGVkLmFsZ29yaXRobXMuZGRwX2NvbW1faG9va3MlMjBpbXBvcnQlMjBwb3dlclNHRF9ob29rJTBBZnJvbSUyMGFjY2VsZXJhdGUudGVzdF91dGlscy50ZXN0aW5nJTIwaW1wb3J0JTIwZ2V0X2JhY2tlbmQlMEElMEFkZXZpY2VfdHlwZSUyQyUyMF8lMkMlMjBfJTIwJTNEJTIwZ2V0X2JhY2tlbmQoKSUwQWRldmljZV9pZCUyMCUzRCUyMGdldGF0dHIodG9yY2glMkMlMjBkZXZpY2VfdHlwZSUyQyUyMHRvcmNoLmN1ZGEpLmN1cnJlbnRfZGV2aWNlKCklMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQW1vZGVsJTIwJTNEJTIwTXlNb2RlbCgpJTBBbW9kZWwlMjAlM0QlMjBERFAobW9kZWwlMkMlMjBkZXZpY2VfaWRzJTNEJTVCZGV2aWNlX2lkJTVEKSUwQXN0YXRlJTIwJTNEJTIwcG93ZXJTR0RfaG9vay5Qb3dlclNHRFN0YXRlKHByb2Nlc3NfZ3JvdXAlM0ROb25lKSUwQW1vZGVsLnJlZ2lzdGVyX2NvbW1faG9vayhzdGF0ZSUzRHN0YXRlJTJDJTIwaG9vayUzRHBvd2VyU0dEX2hvb2sucG93ZXJTR0RfaG9vayklMEElMEElMjMlMjBUcmFpbmluZyUyMGxvb3AlMEFmb3IlMjBkYXRhJTJDJTIwdGFyZ2V0cyUyMGluJTIwZGF0YV9sb2FkZXIlM0ElMEElMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoZGF0YSklMEElMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwY3JpdGVyaW9uKG91dHB1dHMlMkMlMjB0YXJnZXRzKSUwQSUyMCUyMCUyMCUyMGxvc3MuYmFja3dhcmQoKSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci5zdGVwKCklMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIuemVyb19ncmFkKCk=",highlighted:`<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> torch.nn.parallel <span class="hljs-keyword">import</span> DistributedDataParallel <span class="hljs-keyword">as</span> DDP
<span class="hljs-keyword">from</span> torch.distributed.algorithms.ddp_comm_hooks <span class="hljs-keyword">import</span> powerSGD_hook
<span class="hljs-keyword">from</span> accelerate.test_utils.testing <span class="hljs-keyword">import</span> get_backend
device_type, _, _ = get_backend()
device_id = <span class="hljs-built_in">getattr</span>(torch, device_type, torch.cuda).current_device()
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
model = MyModel()
model = DDP(model, device_ids=[device_id])
state = powerSGD_hook.PowerSGDState(process_group=<span class="hljs-literal">None</span>)
model.register_comm_hook(state=state, hook=powerSGD_hook.powerSGD_hook)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
loss.backward()
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){d(a.$$.fragment)},l(l){m(a.$$.fragment,l)},m(l,r){y(a,l,r),o=!0},p:R,i(l){o||(M(a.$$.fragment,l),o=!0)},o(l){J(a.$$.fragment,l),o=!1},d(l){T(a,l)}}}function Ls(h){let a,o;return a=new X({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEREUENvbW11bmljYXRpb25Ib29rVHlwZSUyQyUyMERpc3RyaWJ1dGVkRGF0YVBhcmFsbGVsS3dhcmdzJTBBaW1wb3J0JTIwdG9yY2glMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQSUyMyUyMEREUCUyMENvbW11bmljYXRpb24lMjBIb29rJTIwc2V0dXAlMEFkZHBfa3dhcmdzJTIwJTNEJTIwRGlzdHJpYnV0ZWREYXRhUGFyYWxsZWxLd2FyZ3MoY29tbV9ob29rJTNERERQQ29tbXVuaWNhdGlvbkhvb2tUeXBlLlBPV0VSX1NHRCklMEFhY2NlbGVyYXRvciUyMCUzRCUyMEFjY2VsZXJhdG9yKGt3YXJnc19oYW5kbGVycyUzRCU1QmRkcF9rd2FyZ3MlNUQpJTBBJTBBbW9kZWwlMjAlM0QlMjBNeU1vZGVsKCklMEFvcHRpbWl6ZXIlMjAlM0QlMjB0b3JjaC5vcHRpbS5BZGFtKG1vZGVsLnBhcmFtZXRlcnMoKSklMEFkYXRhX2xvYWRlciUyMCUzRCUyMERhdGFMb2FkZXIoZGF0YXNldCUyQyUyMGJhdGNoX3NpemUlM0QxNiklMEElMEFtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMGRhdGFfbG9hZGVyJTIwJTNEJTIwYWNjZWxlcmF0b3IucHJlcGFyZShtb2RlbCUyQyUyMG9wdGltaXplciUyQyUyMGRhdGFfbG9hZGVyKSUwQSUwQSUyMyUyMFRyYWluaW5nJTIwbG9vcCUwQWZvciUyMGRhdGElMkMlMjB0YXJnZXRzJTIwaW4lMjBkYXRhX2xvYWRlciUzQSUwQSUyMCUyMCUyMCUyMG91dHB1dHMlMjAlM0QlMjBtb2RlbChkYXRhKSUwQSUyMCUyMCUyMCUyMGxvc3MlMjAlM0QlMjBjcml0ZXJpb24ob3V0cHV0cyUyQyUyMHRhcmdldHMpJTBBJTIwJTIwJTIwJTIwYWNjZWxlcmF0b3IuYmFja3dhcmQobG9zcyklMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIuc3RlcCgpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnplcm9fZ3JhZCgp",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, DDPCommunicationHookType, DistributedDataParallelKwargs
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
<span class="hljs-comment"># DDP Communication Hook setup</span>
ddp_kwargs = DistributedDataParallelKwargs(comm_hook=DDPCommunicationHookType.POWER_SGD)
accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
model = MyModel()
optimizer = torch.optim.Adam(model.parameters())
data_loader = DataLoader(dataset, batch_size=<span class="hljs-number">16</span>)
model, optimizer, data_loader = accelerator.prepare(model, optimizer, data_loader)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
accelerator.backward(loss)
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),{c(){d(a.$$.fragment)},l(l){m(a.$$.fragment,l)},m(l,r){y(a,l,r),o=!0},p:R,i(l){o||(M(a.$$.fragment,l),o=!0)},o(l){J(a.$$.fragment,l),o=!1},d(l){T(a,l)}}}function qs(h){let a,o,l,r;return a=new K({props:{id:"powerSGD",option:"PyTorch",$$slots:{default:[Ps]},$$scope:{ctx:h}}}),l=new K({props:{id:"powerSGD",option:"Accelerate",$$slots:{default:[Ls]},$$scope:{ctx:h}}}),{c(){d(a.$$.fragment),o=i(),d(l.$$.fragment)},l(e){m(a.$$.fragment,e),o=w(e),m(l.$$.fragment,e)},m(e,c){y(a,e,c),p(e,o,c),y(l,e,c),r=!0},p(e,c){const U={};c&2&&(U.$$scope={dirty:c,ctx:e}),a.$set(U);const f={};c&2&&(f.$$scope={dirty:c,ctx:e}),l.$set(f)},i(e){r||(M(a.$$.fragment,e),M(l.$$.fragment,e),r=!0)},o(e){J(a.$$.fragment,e),J(l.$$.fragment,e),r=!1},d(e){e&&n(o),T(a,e),T(l,e)}}}function Os(h){let a,o,l,r,e,c,U,f,_,Us="Distributed Data Parallel (DDP) communication hooks provide a generic interface to control how gradients are communicated across workers by overriding the vanilla allreduce in <code>DistributedDataParallel</code>. A few built-in communication hooks are provided, and users can easily apply any of these hooks to optimize communication.",P,$,fs="<li><strong>FP16 Compression Hook</strong>: Compresses gradients by casting them to half-precision floating-point format (<code>torch.float16</code>), reducing communication overhead.</li> <li><strong>BF16 Compression Hook</strong>: Similar to FP16, but uses the Brain Floating Point format (<code>torch.bfloat16</code>), which can be more efficient on certain hardware.</li> <li><strong>PowerSGD Hook</strong>: An advanced gradient compression algorithm that provides high compression rates and can accelerate bandwidth-bound distributed training.</li>",L,g,bs="In this tutorial, you will see how to quickly set up DDP communication hooks and perform training with the utilities provided in Accelerate, which can be as simple as adding just one new line of code! This demonstrates how to use DDP communication hooks to optimize gradient communication in distributed training with the Accelerate library.",q,E,O,j,ss,C,ls,k,as,G,es,W,ts,B,os,Z,ns,V,ps,Y,us="There are two additional utilities for supporting optional functionalities with the communication hooks.",rs,v,cs,F,Is="<code>comm_wrapper</code> is an option to wrap a communication hook with additional functionality. For example, it can be used to combine FP16 compression with other communication strategies. Currently supported wrappers are <code>no</code>, <code>fp16</code>, and <code>bf16</code>.",is,Q,ws,N,ds,S,js="<code>comm_state_option</code> allows you to pass additional state information required by certain communication hooks. This is particularly useful for stateful hooks like <code>PowerSGD</code>, which require maintaining hyperparameters and internal states across training steps. Below is an example showcasing the use of <code>comm_state_option</code> with the <code>PowerSGD</code> hook.",ms,z,ys,H,ks='For more advanced usage and additional hooks, refer to the <a href="https://pytorch.org/docs/stable/ddp_comm_hooks.html" rel="nofollow">PyTorch DDP Communication Hooks documentation</a>.',Ms,D,Js,A,Ts;return e=new Fs({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),U=new x({props:{title:"DDP Communication Hooks",local:"ddp-communication-hooks",headingTag:"h1"}}),E=new x({props:{title:"FP16 Compression Hook",local:"fp16-compression-hook",headingTag:"h2"}}),j=new hs({props:{id:"fp16",options:["PyTorch","Accelerate"],$$slots:{default:[zs]},$$scope:{ctx:h}}}),C=new x({props:{title:"BF16 Compression Hook",local:"bf16-compression-hook",headingTag:"h3"}}),k=new gs({props:{warning:!0,$$slots:{default:[Hs]},$$scope:{ctx:h}}}),G=new hs({props:{id:"bf16",options:["PyTorch","Accelerate"],$$slots:{default:[Ks]},$$scope:{ctx:h}}}),W=new x({props:{title:"PowerSGD Hook",local:"powersgd-hook",headingTag:"h3"}}),B=new gs({props:{warning:!0,$$slots:{default:[As]},$$scope:{ctx:h}}}),Z=new hs({props:{id:"powerSGD",options:["PyTorch","Accelerate"],$$slots:{default:[qs]},$$scope:{ctx:h}}}),V=new x({props:{title:"DDP Communication Hooks utilities",local:"ddp-communication-hooks-utilities",headingTag:"h2"}}),v=new x({props:{title:"comm_wrapper",local:"commwrapper",headingTag:"h3"}}),Q=new X({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEREUENvbW11bmljYXRpb25Ib29rVHlwZSUyQyUyMERpc3RyaWJ1dGVkRGF0YVBhcmFsbGVsS3dhcmdzJTBBaW1wb3J0JTIwdG9yY2glMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQSUyMyUyMEREUCUyMENvbW11bmljYXRpb24lMjBIb29rJTIwc2V0dXAlMEFkZHBfa3dhcmdzJTIwJTNEJTIwRGlzdHJpYnV0ZWREYXRhUGFyYWxsZWxLd2FyZ3MoJTBBJTIwJTIwJTIwJTIwY29tbV9ob29rJTNERERQQ29tbXVuaWNhdGlvbkhvb2tUeXBlLlBPV0VSX1NHRCUyQyUwQSUyMCUyMCUyMCUyMGNvbW1fd3JhcHBlciUzREREUENvbW11bmljYXRpb25Ib29rVHlwZS5GUDE2JTBBKSUwQWFjY2VsZXJhdG9yJTIwJTNEJTIwQWNjZWxlcmF0b3Ioa3dhcmdzX2hhbmRsZXJzJTNEJTVCZGRwX2t3YXJncyU1RCklMEElMEFtb2RlbCUyMCUzRCUyME15TW9kZWwoKSUwQW9wdGltaXplciUyMCUzRCUyMHRvcmNoLm9wdGltLkFkYW0obW9kZWwucGFyYW1ldGVycygpKSUwQWRhdGFfbG9hZGVyJTIwJTNEJTIwRGF0YUxvYWRlcihkYXRhc2V0JTJDJTIwYmF0Y2hfc2l6ZSUzRDE2KSUwQSUwQW1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwZGF0YV9sb2FkZXIlMjAlM0QlMjBhY2NlbGVyYXRvci5wcmVwYXJlKG1vZGVsJTJDJTIwb3B0aW1pemVyJTJDJTIwZGF0YV9sb2FkZXIpJTBBJTBBJTIzJTIwVHJhaW5pbmclMjBsb29wJTBBZm9yJTIwZGF0YSUyQyUyMHRhcmdldHMlMjBpbiUyMGRhdGFfbG9hZGVyJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMG1vZGVsKGRhdGEpJTBBJTIwJTIwJTIwJTIwbG9zcyUyMCUzRCUyMGNyaXRlcmlvbihvdXRwdXRzJTJDJTIwdGFyZ2V0cyklMEElMjAlMjAlMjAlMjBhY2NlbGVyYXRvci5iYWNrd2FyZChsb3NzKSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci5zdGVwKCklMEElMjAlMjAlMjAlMjBvcHRpbWl6ZXIuemVyb19ncmFkKCk=",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, DDPCommunicationHookType, DistributedDataParallelKwargs
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
<span class="hljs-comment"># DDP Communication Hook setup</span>
ddp_kwargs = DistributedDataParallelKwargs(
comm_hook=DDPCommunicationHookType.POWER_SGD,
comm_wrapper=DDPCommunicationHookType.FP16
)
accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
model = MyModel()
optimizer = torch.optim.Adam(model.parameters())
data_loader = DataLoader(dataset, batch_size=<span class="hljs-number">16</span>)
model, optimizer, data_loader = accelerator.prepare(model, optimizer, data_loader)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
accelerator.backward(loss)
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),N=new x({props:{title:"comm_state_option",local:"commstateoption",headingTag:"h3"}}),z=new X({props:{code:"ZnJvbSUyMGFjY2VsZXJhdGUlMjBpbXBvcnQlMjBBY2NlbGVyYXRvciUyQyUyMEREUENvbW11bmljYXRpb25Ib29rVHlwZSUyQyUyMERpc3RyaWJ1dGVkRGF0YVBhcmFsbGVsS3dhcmdzJTBBaW1wb3J0JTIwdG9yY2glMEElMEFjbGFzcyUyME15TW9kZWwodG9yY2gubm4uTW9kdWxlKSUzQSUwQSUyMCUyMCUyMCUyMGRlZiUyMF9faW5pdF9fKHNlbGYpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3VwZXIoKS5fX2luaXRfXygpJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc2VsZi5sYXllciUyMCUzRCUyMHRvcmNoLm5uLkxpbmVhcigxMCUyQyUyMDEwKSUwQSUwQSUyMCUyMCUyMCUyMGRlZiUyMGZvcndhcmQoc2VsZiUyQyUyMHgpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwc2VsZi5sYXllcih4KSUwQSUwQSUyMyUyMEREUCUyMENvbW11bmljYXRpb24lMjBIb29rJTIwc2V0dXAlMEFkZHBfa3dhcmdzJTIwJTNEJTIwRGlzdHJpYnV0ZWREYXRhUGFyYWxsZWxLd2FyZ3MoJTBBJTIwJTIwJTIwJTIwY29tbV9ob29rJTNERERQQ29tbXVuaWNhdGlvbkhvb2tUeXBlLlBPV0VSX1NHRCUyQyUwQSUyMCUyMCUyMCUyMGNvbW1fc3RhdGVfb3B0aW9uJTNEJTdCJTIybWF0cml4X2FwcHJveGltYXRpb25fcmFuayUyMiUzQSUyMDIlN0QlMEEpJTBBYWNjZWxlcmF0b3IlMjAlM0QlMjBBY2NlbGVyYXRvcihrd2FyZ3NfaGFuZGxlcnMlM0QlNUJkZHBfa3dhcmdzJTVEKSUwQSUwQW1vZGVsJTIwJTNEJTIwTXlNb2RlbCgpJTBBb3B0aW1pemVyJTIwJTNEJTIwdG9yY2gub3B0aW0uQWRhbShtb2RlbC5wYXJhbWV0ZXJzKCkpJTBBZGF0YV9sb2FkZXIlMjAlM0QlMjBEYXRhTG9hZGVyKGRhdGFzZXQlMkMlMjBiYXRjaF9zaXplJTNEMTYpJTBBJTBBbW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciUyMCUzRCUyMGFjY2VsZXJhdG9yLnByZXBhcmUobW9kZWwlMkMlMjBvcHRpbWl6ZXIlMkMlMjBkYXRhX2xvYWRlciklMEElMEElMjMlMjBUcmFpbmluZyUyMGxvb3AlMEFmb3IlMjBkYXRhJTJDJTIwdGFyZ2V0cyUyMGluJTIwZGF0YV9sb2FkZXIlM0ElMEElMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoZGF0YSklMEElMjAlMjAlMjAlMjBsb3NzJTIwJTNEJTIwY3JpdGVyaW9uKG91dHB1dHMlMkMlMjB0YXJnZXRzKSUwQSUyMCUyMCUyMCUyMGFjY2VsZXJhdG9yLmJhY2t3YXJkKGxvc3MpJTBBJTIwJTIwJTIwJTIwb3B0aW1pemVyLnN0ZXAoKSUwQSUyMCUyMCUyMCUyMG9wdGltaXplci56ZXJvX2dyYWQoKQ==",highlighted:`<span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator, DDPCommunicationHookType, DistributedDataParallelKwargs
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">class</span> <span class="hljs-title class_">MyModel</span>(torch.nn.Module):
<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self</span>):
<span class="hljs-built_in">super</span>().__init__()
self.layer = torch.nn.Linear(<span class="hljs-number">10</span>, <span class="hljs-number">10</span>)
<span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, x</span>):
<span class="hljs-keyword">return</span> self.layer(x)
<span class="hljs-comment"># DDP Communication Hook setup</span>
ddp_kwargs = DistributedDataParallelKwargs(
comm_hook=DDPCommunicationHookType.POWER_SGD,
comm_state_option={<span class="hljs-string">&quot;matrix_approximation_rank&quot;</span>: <span class="hljs-number">2</span>}
)
accelerator = Accelerator(kwargs_handlers=[ddp_kwargs])
model = MyModel()
optimizer = torch.optim.Adam(model.parameters())
data_loader = DataLoader(dataset, batch_size=<span class="hljs-number">16</span>)
model, optimizer, data_loader = accelerator.prepare(model, optimizer, data_loader)
<span class="hljs-comment"># Training loop</span>
<span class="hljs-keyword">for</span> data, targets <span class="hljs-keyword">in</span> data_loader:
outputs = model(data)
loss = criterion(outputs, targets)
accelerator.backward(loss)
optimizer.step()
optimizer.zero_grad()`,wrap:!1}}),D=new Qs({props:{source:"https://github.com/huggingface/accelerate/blob/main/docs/source/usage_guides/ddp_comm_hook.md"}}),{c(){a=b("meta"),o=i(),l=b("p"),r=i(),d(e.$$.fragment),c=i(),d(U.$$.fragment),f=i(),_=b("p"),_.innerHTML=Us,P=i(),$=b("ul"),$.innerHTML=fs,L=i(),g=b("p"),g.textContent=bs,q=i(),d(E.$$.fragment),O=i(),d(j.$$.fragment),ss=i(),d(C.$$.fragment),ls=i(),d(k.$$.fragment),as=i(),d(G.$$.fragment),es=i(),d(W.$$.fragment),ts=i(),d(B.$$.fragment),os=i(),d(Z.$$.fragment),ns=i(),d(V.$$.fragment),ps=i(),Y=b("p"),Y.textContent=us,rs=i(),d(v.$$.fragment),cs=i(),F=b("p"),F.innerHTML=Is,is=i(),d(Q.$$.fragment),ws=i(),d(N.$$.fragment),ds=i(),S=b("p"),S.innerHTML=js,ms=i(),d(z.$$.fragment),ys=i(),H=b("p"),H.innerHTML=ks,Ms=i(),d(D.$$.fragment),Js=i(),A=b("p"),this.h()},l(s){const t=Ys("svelte-u9bgzb",document.head);a=u(t,"META",{name:!0,content:!0}),t.forEach(n),o=w(s),l=u(s,"P",{}),_s(l).forEach(n),r=w(s),m(e.$$.fragment,s),c=w(s),m(U.$$.fragment,s),f=w(s),_=u(s,"P",{"data-svelte-h":!0}),I(_)!=="svelte-x6tyko"&&(_.innerHTML=Us),P=w(s),$=u(s,"UL",{"data-svelte-h":!0}),I($)!=="svelte-zxp3oz"&&($.innerHTML=fs),L=w(s),g=u(s,"P",{"data-svelte-h":!0}),I(g)!=="svelte-10ve3hd"&&(g.textContent=bs),q=w(s),m(E.$$.fragment,s),O=w(s),m(j.$$.fragment,s),ss=w(s),m(C.$$.fragment,s),ls=w(s),m(k.$$.fragment,s),as=w(s),m(G.$$.fragment,s),es=w(s),m(W.$$.fragment,s),ts=w(s),m(B.$$.fragment,s),os=w(s),m(Z.$$.fragment,s),ns=w(s),m(V.$$.fragment,s),ps=w(s),Y=u(s,"P",{"data-svelte-h":!0}),I(Y)!=="svelte-1nbipsx"&&(Y.textContent=us),rs=w(s),m(v.$$.fragment,s),cs=w(s),F=u(s,"P",{"data-svelte-h":!0}),I(F)!=="svelte-oi3r9p"&&(F.innerHTML=Is),is=w(s),m(Q.$$.fragment,s),ws=w(s),m(N.$$.fragment,s),ds=w(s),S=u(s,"P",{"data-svelte-h":!0}),I(S)!=="svelte-1igm2b3"&&(S.innerHTML=js),ms=w(s),m(z.$$.fragment,s),ys=w(s),H=u(s,"P",{"data-svelte-h":!0}),I(H)!=="svelte-1ll6vn"&&(H.innerHTML=ks),Ms=w(s),m(D.$$.fragment,s),Js=w(s),A=u(s,"P",{}),_s(A).forEach(n),this.h()},h(){$s(a,"name","hf:doc:metadata"),$s(a,"content",sl)},m(s,t){vs(document.head,a),p(s,o,t),p(s,l,t),p(s,r,t),y(e,s,t),p(s,c,t),y(U,s,t),p(s,f,t),p(s,_,t),p(s,P,t),p(s,$,t),p(s,L,t),p(s,g,t),p(s,q,t),y(E,s,t),p(s,O,t),y(j,s,t),p(s,ss,t),y(C,s,t),p(s,ls,t),y(k,s,t),p(s,as,t),y(G,s,t),p(s,es,t),y(W,s,t),p(s,ts,t),y(B,s,t),p(s,os,t),y(Z,s,t),p(s,ns,t),y(V,s,t),p(s,ps,t),p(s,Y,t),p(s,rs,t),y(v,s,t),p(s,cs,t),p(s,F,t),p(s,is,t),y(Q,s,t),p(s,ws,t),y(N,s,t),p(s,ds,t),p(s,S,t),p(s,ms,t),y(z,s,t),p(s,ys,t),p(s,H,t),p(s,Ms,t),y(D,s,t),p(s,Js,t),p(s,A,t),Ts=!0},p(s,[t]){const Gs={};t&2&&(Gs.$$scope={dirty:t,ctx:s}),j.$set(Gs);const Bs={};t&2&&(Bs.$$scope={dirty:t,ctx:s}),k.$set(Bs);const Zs={};t&2&&(Zs.$$scope={dirty:t,ctx:s}),G.$set(Zs);const Xs={};t&2&&(Xs.$$scope={dirty:t,ctx:s}),B.$set(Xs);const Rs={};t&2&&(Rs.$$scope={dirty:t,ctx:s}),Z.$set(Rs)},i(s){Ts||(M(e.$$.fragment,s),M(U.$$.fragment,s),M(E.$$.fragment,s),M(j.$$.fragment,s),M(C.$$.fragment,s),M(k.$$.fragment,s),M(G.$$.fragment,s),M(W.$$.fragment,s),M(B.$$.fragment,s),M(Z.$$.fragment,s),M(V.$$.fragment,s),M(v.$$.fragment,s),M(Q.$$.fragment,s),M(N.$$.fragment,s),M(z.$$.fragment,s),M(D.$$.fragment,s),Ts=!0)},o(s){J(e.$$.fragment,s),J(U.$$.fragment,s),J(E.$$.fragment,s),J(j.$$.fragment,s),J(C.$$.fragment,s),J(k.$$.fragment,s),J(G.$$.fragment,s),J(W.$$.fragment,s),J(B.$$.fragment,s),J(Z.$$.fragment,s),J(V.$$.fragment,s),J(v.$$.fragment,s),J(Q.$$.fragment,s),J(N.$$.fragment,s),J(z.$$.fragment,s),J(D.$$.fragment,s),Ts=!1},d(s){s&&(n(o),n(l),n(r),n(c),n(f),n(_),n(P),n($),n(L),n(g),n(q),n(O),n(ss),n(ls),n(as),n(es),n(ts),n(os),n(ns),n(ps),n(Y),n(rs),n(cs),n(F),n(is),n(ws),n(ds),n(S),n(ms),n(ys),n(H),n(Ms),n(Js),n(A)),n(a),T(e,s),T(U,s),T(E,s),T(j,s),T(C,s),T(k,s),T(G,s),T(W,s),T(B,s),T(Z,s),T(V,s),T(v,s),T(Q,s),T(N,s),T(z,s),T(D,s)}}}const sl='{"title":"DDP Communication Hooks","local":"ddp-communication-hooks","sections":[{"title":"FP16 Compression Hook","local":"fp16-compression-hook","sections":[{"title":"BF16 Compression Hook","local":"bf16-compression-hook","sections":[],"depth":3},{"title":"PowerSGD Hook","local":"powersgd-hook","sections":[],"depth":3}],"depth":2},{"title":"DDP Communication Hooks utilities","local":"ddp-communication-hooks-utilities","sections":[{"title":"comm_wrapper","local":"commwrapper","sections":[],"depth":3},{"title":"comm_state_option","local":"commstateoption","sections":[],"depth":3}],"depth":2}],"depth":1}';function ll(h){return Cs(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class rl extends Ws{constructor(a){super(),Vs(this,a,ll,Os,Es,{})}}export{rl as component};

Xet Storage Details

Size:
38.2 kB
·
Xet hash:
045377199f916de7c7bfe9b561ed7ce7f406d638575051383024676c133c4a60

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.