Buckets:

HuggingFaceDocBuilder's picture
download
raw
29.7 kB
import{s as vt,n as wt,o as Pt}from"../chunks/scheduler.b9285784.js";import{S as xt,i as St,e as l,s as r,c as s,h as Dt,a as d,d as t,b as o,f as g,g as c,j as y,k as h,l as n,m as i,n as p,t as f,o as m,p as _}from"../chunks/index.26bc89a1.js";import{C as Ft,H as T,E as Tt}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.7a0ae628.js";import{D as v}from"../chunks/Docstring.3b3b5305.js";function Ct(dt){let w,ge,_e,he,U,be,O,ye,L,$e,P,M,je,te,st="Enables RAM efficient loading of Hugging Face models for FSDP in the environment.",ve,I,we,x,E,Ge,ae,ct="Disables RAM efficient loading of Hugging Face models for FSDP in the environment.",Pe,R,xe,$,A,Ke,re,pt=`Merge the weights from sharded FSDP model checkpoints into a single combined checkpoint. Should be used if
<code>SHARDED_STATE_DICT</code> was used for the model. Weights will be saved to <code>{output_path}/model.safetensors</code> if
<code>safe_serialization</code> else <code>pytorch_model.bin</code>.`,Je,oe,ft="Note: this is a CPU-bound process.",Se,H,De,u,V,Qe,ne,mt="This plugin is used to enable fully sharded data parallelism.",Xe,C,q,Ye,ie,_t=`Given <code>model</code>, creates an <code>auto_wrap_policy</code> based on the passed in policy and if we can use the
<code>transformer_cls_to_wrap</code>`,Ze,k,W,et,le,ut="Sets the mixed precision policy for FSDP",tt,N,B,at,de,gt="Set the state dict config based on the <code>StateDictType</code>.",rt,z,j,ot,se,ht="Validates the mixed precision policy, abstracted away to not bring in the imports if not needed.",Fe,G,Te,S,K,nt,ce,bt=`Loads the full state dict (could be only on rank 0) into the sharded model. This is done by broadcasting the
parameters from rank 0 to all other ranks. This function modifies the model in-place.`,Ce,J,ke,D,Q,it,pe,yt=`Switches the parameters of the optimizer to new ones (sharded parameters in usual case). This function modifies the
optimizer in-place.`,Ne,X,ze,F,Y,lt,fe,$t="Prepares the model for FSDP2 in-place. Also returns the model to avoid misuse of the original model.",Ue,Z,Oe,ee,Le,ue,Me;return U=new Ft({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),O=new T({props:{title:"Fully Sharded Data Parallel utilities",local:"fully-sharded-data-parallel-utilities",headingTag:"h1"}}),L=new T({props:{title:"enable_fsdp_ram_efficient_loading",local:"accelerate.utils.enable_fsdp_ram_efficient_loading",headingTag:"h2"}}),M=new v({props:{name:"accelerate.utils.enable_fsdp_ram_efficient_loading",anchor:"accelerate.utils.enable_fsdp_ram_efficient_loading",parameters:[],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/fsdp_utils.py#L39"}}),I=new T({props:{title:"disable_fsdp_ram_efficient_loading",local:"accelerate.utils.disable_fsdp_ram_efficient_loading",headingTag:"h2"}}),E=new v({props:{name:"accelerate.utils.disable_fsdp_ram_efficient_loading",anchor:"accelerate.utils.disable_fsdp_ram_efficient_loading",parameters:[],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/fsdp_utils.py#L49"}}),R=new T({props:{title:"merge_fsdp_weights",local:"accelerate.utils.merge_fsdp_weights",headingTag:"h2"}}),A=new v({props:{name:"accelerate.utils.merge_fsdp_weights",anchor:"accelerate.utils.merge_fsdp_weights",parameters:[{name:"checkpoint_dir",val:": str"},{name:"output_path",val:": str"},{name:"safe_serialization",val:": bool = True"},{name:"remove_checkpoint_dir",val:": bool = False"}],parametersDescription:[{anchor:"accelerate.utils.merge_fsdp_weights.checkpoint_dir",description:`<strong>checkpoint_dir</strong> (<code>str</code>) &#x2014;
The directory containing the FSDP checkpoints (can be either the model or optimizer).`,name:"checkpoint_dir"},{anchor:"accelerate.utils.merge_fsdp_weights.output_path",description:`<strong>output_path</strong> (<code>str</code>) &#x2014;
The path to save the merged checkpoint.`,name:"output_path"},{anchor:"accelerate.utils.merge_fsdp_weights.safe_serialization",description:`<strong>safe_serialization</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to save the merged weights with safetensors (recommended).`,name:"safe_serialization"},{anchor:"accelerate.utils.merge_fsdp_weights.remove_checkpoint_dir",description:`<strong>remove_checkpoint_dir</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether to remove the checkpoint directory after merging.`,name:"remove_checkpoint_dir"}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/fsdp_utils.py#L366"}}),H=new T({props:{title:"FullyShardedDataParallelPlugin",local:"accelerate.FullyShardedDataParallelPlugin",headingTag:"h2"}}),V=new v({props:{name:"class accelerate.FullyShardedDataParallelPlugin",anchor:"accelerate.FullyShardedDataParallelPlugin",parameters:[{name:"fsdp_version",val:": int = None"},{name:"sharding_strategy",val:": typing.Union[str, ForwardRef('torch.distributed.fsdp.ShardingStrategy')] = None"},{name:"reshard_after_forward",val:": typing.Union[str, ForwardRef('torch.distributed.fsdp.ShardingStrategy'), bool] = None"},{name:"backward_prefetch",val:": typing.Union[str, ForwardRef('torch.distributed.fsdp.BackwardPrefetch'), NoneType] = None"},{name:"mixed_precision_policy",val:": typing.Union[dict, str, ForwardRef('torch.distributed.fsdp.MixedPrecision'), ForwardRef('torch.distributed.fsdp.MixedPrecisionPolicy'), NoneType] = None"},{name:"auto_wrap_policy",val:": typing.Union[typing.Callable, typing.Literal['transformer_based_wrap', 'size_based_wrap', 'no_wrap'], NoneType] = None"},{name:"cpu_offload",val:": typing.Union[bool, ForwardRef('torch.distributed.fsdp.CPUOffload'), ForwardRef('torch.distributed.fsdp.CPUOffloadPolicy')] = None"},{name:"ignored_modules",val:": typing.Union[collections.abc.Iterable[torch.nn.modules.module.Module], str, NoneType] = None"},{name:"state_dict_type",val:": typing.Union[str, ForwardRef('torch.distributed.fsdp.StateDictType')] = None"},{name:"state_dict_config",val:": typing.Union[ForwardRef('torch.distributed.fsdp.FullStateDictConfig'), ForwardRef('torch.distributed.fsdp.ShardedStateDictConfig'), NoneType] = None"},{name:"optim_state_dict_config",val:": typing.Union[ForwardRef('torch.distributed.fsdp.FullOptimStateDictConfig'), ForwardRef('torch.distributed.fsdp.ShardedOptimStateDictConfig'), NoneType] = None"},{name:"limit_all_gathers",val:": bool = True"},{name:"use_orig_params",val:": typing.Optional[bool] = None"},{name:"param_init_fn",val:": typing.Optional[typing.Callable[[torch.nn.modules.module.Module], NoneType]] = None"},{name:"sync_module_states",val:": typing.Optional[bool] = None"},{name:"forward_prefetch",val:": bool = None"},{name:"activation_checkpointing",val:": bool = None"},{name:"cpu_ram_efficient_loading",val:": bool = None"},{name:"transformer_cls_names_to_wrap",val:": typing.Optional[list[str]] = None"},{name:"min_num_params",val:": typing.Optional[int] = None"}],parametersDescription:[{anchor:"accelerate.FullyShardedDataParallelPlugin.fsdp_version",description:`<strong>fsdp_version</strong> (<code>int</code>, defaults to <code>1</code>) &#x2014;
The version of FSDP to use. Defaults to 1. If set to 2, launcher expects the config to be converted to
FSDP2 format.`,name:"fsdp_version"},{anchor:"accelerate.FullyShardedDataParallelPlugin.sharding_strategy",description:`<strong>sharding_strategy</strong> (<code>Union[str, torch.distributed.fsdp.ShardingStrategy]</code>, defaults to <code>&apos;FULL_SHARD&apos;</code>) &#x2014;
Sharding strategy to use. Should be either a <code>str</code> or an instance of
<code>torch.distributed.fsdp.fully_sharded_data_parallel.ShardingStrategy</code>. Is deprecated in favor of
<code>reshard_after_forward</code>.`,name:"sharding_strategy"},{anchor:"accelerate.FullyShardedDataParallelPlugin.reshard_after_forward",description:`<strong>reshard_after_forward</strong> (<code>Union[str, torch.distributed.fsdp.ShardingStrategy, bool]</code>, defaults to <code>&apos;FULL_SHARD&apos;</code> for <code>fsdp_version=1</code> and <code>True</code> for <code>fsdp_version=2</code>) &#x2014;
Sharding strategy to use. Should be a bool if <code>fsdp_version</code> is set to 2 else a <code>str</code> or an instance of
<code>torch.distributed.fsdp.fully_sharded_data_parallel.ShardingStrategy</code>.`,name:"reshard_after_forward"},{anchor:"accelerate.FullyShardedDataParallelPlugin.backward_prefetch",description:`<strong>backward_prefetch</strong> (<code>Union[str, torch.distributed.fsdp.BackwardPrefetch]</code>, defaults to <code>&apos;NO_PREFETCH&apos;</code>) &#x2014;
Backward prefetch strategy to use. Should be either a <code>str</code> or an instance of
<code>torch.distributed.fsdp.fully_sharded_data_parallel.BackwardPrefetch</code>.`,name:"backward_prefetch"},{anchor:"accelerate.FullyShardedDataParallelPlugin.mixed_precision_policy",description:`<strong>mixed_precision_policy</strong> (<code>Optional[Union[dict, str, torch.distributed.fsdp.MixedPrecision, torch.distributed.fsdp.MixedPrecisionPolicy]]</code>, defaults to <code>None</code>) &#x2014;
A config to enable mixed precision training with FullyShardedDataParallel. If passing in a <code>dict</code>, it
should have the following keys: <code>param_dtype</code>, <code>reduce_dtype</code>, and <code>buffer_dtype</code>, can be an instance of
<code>torch.distributed.fsdp.MixedPrecisionPolicy</code> if <code>fsdp_version</code> is set to 2. If passing in a <code>str</code>, it
should be one of the following values: fp8, fp16, bf16, fp32, and used to set <code>param_dtype</code>,
<code>reduce_dtype</code>, and <code>buffer_dtype</code>.`,name:"mixed_precision_policy"},{anchor:"accelerate.FullyShardedDataParallelPlugin.auto_wrap_policy",description:"<strong>auto_wrap_policy</strong> (<code>Optional(Union[Callable, Literal[&quot;transformer_based_wrap&quot;, &quot;size_based_wrap&quot;, &quot;no_wrap&quot;]]), defaults to </code>NO_WRAP<code>) -- A callable or string specifying a policy to recursively wrap layers with FSDP. If a string, it must be one of </code>transformer_based_wrap<code>, </code>size_based_wrap<code>, or </code>no_wrap<code>. See </code>torch.distributed.fsdp.wrap.size_based_wrap_policy` for a direction on what it should look like.",name:"auto_wrap_policy"},{anchor:"accelerate.FullyShardedDataParallelPlugin.cpu_offload",description:`<strong>cpu_offload</strong> (<code>Union[bool, torch.distributed.fsdp.CPUOffload, torch.distributed.fsdp.CPUOffloadPolicy]</code>, defaults to <code>False</code>) &#x2014;
Whether to offload parameters to CPU. Should be either a <code>bool</code> or an instance of
<code>torch.distributed.fsdp.fully_sharded_data_parallel.CPUOffload</code> or
<code>torch.distributed.fsdp.fully_sharded_data_parallel.CPUOffloadPolicy</code> if <code>fsdp_version</code> is set to 2.`,name:"cpu_offload"},{anchor:"accelerate.FullyShardedDataParallelPlugin.ignored_modules",description:`<strong>ignored_modules</strong> (<code>Optional[Union[Iterable[torch.nn.Module], str]]</code>, defaults to <code>None</code>) &#x2014;
A list of modules to ignore when wrapping with FSDP. When passing a string, will match the modules by name
using regex fullmatch. If <code>fsdp_version</code> is set to 2, the modules are converted to parameters and used.`,name:"ignored_modules"},{anchor:"accelerate.FullyShardedDataParallelPlugin.state_dict_type",description:`<strong>state_dict_type</strong> (<code>Union[str, torch.distributed.fsdp.StateDictType]</code>, defaults to <code>&apos;FULL_STATE_DICT&apos;</code>) &#x2014;
State dict type to use. If a string, it must be one of <code>full_state_dict</code>, <code>local_state_dict</code>, or
<code>sharded_state_dict</code>.`,name:"state_dict_type"},{anchor:"accelerate.FullyShardedDataParallelPlugin.state_dict_config",description:`<strong>state_dict_config</strong> (<code>Optional[Union[torch.distributed.fsdp.FullStateDictConfig, torch.distributed.fsdp.ShardedStateDictConfig]</code>, defaults to <code>None</code>) &#x2014;
State dict config to use. Is determined based on the <code>state_dict_type</code> if not passed in.`,name:"state_dict_config"},{anchor:"accelerate.FullyShardedDataParallelPlugin.optim_state_dict_config",description:`<strong>optim_state_dict_config</strong> (<code>Optional[Union[torch.distributed.fsdp.FullOptimStateDictConfig, torch.distributed.fsdp.ShardedOptimStateDictConfig]</code>, defaults to <code>None</code>) &#x2014;
Optim state dict config to use. Is determined based on the <code>state_dict_type</code> if not passed in.`,name:"optim_state_dict_config"},{anchor:"accelerate.FullyShardedDataParallelPlugin.limit_all_gathers",description:`<strong>limit_all_gathers</strong> (<code>bool</code>, defaults to <code>True</code>) &#x2014;
Whether to have FSDP explicitly synchronizes the CPU thread to prevent too many in-flight all-gathers. This
bool only affects the sharded strategies that schedule all-gathers. Enabling this can help lower the number
of CUDA malloc retries.`,name:"limit_all_gathers"},{anchor:"accelerate.FullyShardedDataParallelPlugin.use_orig_params",description:`<strong>use_orig_params</strong> (<code>bool</code>, defaults to <code>False</code>) &#x2014;
Whether to use the original parameters for the optimizer.`,name:"use_orig_params"},{anchor:"accelerate.FullyShardedDataParallelPlugin.param_init_fn",description:`<strong>param_init_fn</strong> (<code>Optional[Callable[[torch.nn.Module], None]</code>, defaults to <code>None</code>) &#x2014;
A <code>Callable[torch.nn.Module] -&gt; None</code> that specifies how modules that are currently on the meta device
should be initialized onto an actual device. Only applicable when <code>sync_module_states</code> is <code>True</code>. By
default is a <code>lambda</code> which calls <code>to_empty</code> on the module.`,name:"param_init_fn"},{anchor:"accelerate.FullyShardedDataParallelPlugin.sync_module_states",description:`<strong>sync_module_states</strong> (<code>bool</code>, defaults to <code>False</code>) &#x2014;
Whether each individually wrapped FSDP unit should broadcast module parameters from rank 0 to ensure they
are the same across all ranks after initialization. Defaults to <code>False</code> unless <code>cpu_ram_efficient_loading</code>
is <code>True</code>, then will be forcibly enabled.`,name:"sync_module_states"},{anchor:"accelerate.FullyShardedDataParallelPlugin.forward_prefetch",description:`<strong>forward_prefetch</strong> (<code>bool</code>, defaults to <code>False</code>) &#x2014;
Whether to have FSDP explicitly prefetches the next upcoming all-gather while executing in the forward
pass. only use with Static graphs.`,name:"forward_prefetch"},{anchor:"accelerate.FullyShardedDataParallelPlugin.activation_checkpointing",description:`<strong>activation_checkpointing</strong> (<code>bool</code>, defaults to <code>False</code>) &#x2014;
A technique to reduce memory usage by clearing activations of certain layers and recomputing them during a
backward pass. Effectively, this trades extra computation time for reduced memory usage.`,name:"activation_checkpointing"},{anchor:"accelerate.FullyShardedDataParallelPlugin.cpu_ram_efficient_loading",description:`<strong>cpu_ram_efficient_loading</strong> (<code>bool</code>, defaults to <code>None</code>) &#x2014;
If True, only the first process loads the pretrained model checkoint while all other processes have empty
weights. Only applicable for Transformers. When using this, <code>sync_module_states</code> needs to be <code>True</code>.`,name:"cpu_ram_efficient_loading"},{anchor:"accelerate.FullyShardedDataParallelPlugin.transformer_cls_names_to_wrap",description:`<strong>transformer_cls_names_to_wrap</strong> (<code>Optional[List[str]]</code>, defaults to <code>None</code>) &#x2014;
A list of transformer layer class names to wrap. Only applicable when <code>auto_wrap_policy</code> is
<code>transformer_based_wrap</code>.`,name:"transformer_cls_names_to_wrap"},{anchor:"accelerate.FullyShardedDataParallelPlugin.min_num_params",description:`<strong>min_num_params</strong> (<code>Optional[int]</code>, defaults to <code>None</code>) &#x2014;
The minimum number of parameters a module must have to be wrapped. Only applicable when <code>auto_wrap_policy</code>
is <code>size_based_wrap</code>.`,name:"min_num_params"}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/dataclasses.py#L1571"}}),q=new v({props:{name:"set_auto_wrap_policy",anchor:"accelerate.FullyShardedDataParallelPlugin.set_auto_wrap_policy",parameters:[{name:"model",val:""}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/dataclasses.py#L2041"}}),W=new v({props:{name:"set_mixed_precision",anchor:"accelerate.FullyShardedDataParallelPlugin.set_mixed_precision",parameters:[{name:"mixed_precision",val:""},{name:"buffer_autocast",val:" = False"},{name:"override",val:" = False"}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/dataclasses.py#L2075"}}),B=new v({props:{name:"set_state_dict_type",anchor:"accelerate.FullyShardedDataParallelPlugin.set_state_dict_type",parameters:[{name:"state_dict_type",val:" = None"}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/dataclasses.py#L1996"}}),j=new v({props:{name:"validate_mixed_precision_policy",anchor:"accelerate.FullyShardedDataParallelPlugin.validate_mixed_precision_policy",parameters:[],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/dataclasses.py#L2127"}}),G=new T({props:{title:"fsdp2_load_full_state_dict",local:"accelerate.utils.fsdp2_load_full_state_dict",headingTag:"h2"}}),K=new v({props:{name:"accelerate.utils.fsdp2_load_full_state_dict",anchor:"accelerate.utils.fsdp2_load_full_state_dict",parameters:[{name:"accelerator",val:""},{name:"model",val:": Module"},{name:"full_sd",val:": dict"},{name:"cpu_offload",val:": bool = False"}],parametersDescription:[{anchor:"accelerate.utils.fsdp2_load_full_state_dict.accelerator",description:"<strong>accelerator</strong> (<code>Accelerator</code>) &#x2014; The accelerator instance",name:"accelerator"},{anchor:"accelerate.utils.fsdp2_load_full_state_dict.model",description:`<strong>model</strong> (<code>torch.nn.Module</code>) &#x2014;
The model to load the state dict into, expected to be on meta device or a VRAM spike can occur`,name:"model"},{anchor:"accelerate.utils.fsdp2_load_full_state_dict.full_sd",description:"<strong>full_sd</strong> (<code>dict</code>) &#x2014; The full state dict to load, can only be on rank 0",name:"full_sd"},{anchor:"accelerate.utils.fsdp2_load_full_state_dict.cpu_offload",description:`<strong>cpu_offload</strong> (<code>bool</code>, defaults to <code>False</code>) &#x2014;
If True, move sharded parameters to CPU after distribution. Required when FSDP CPU offloading is enabled.`,name:"cpu_offload"}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/fsdp_utils.py#L467"}}),J=new T({props:{title:"fsdp2_switch_optimizer_parameters",local:"accelerate.utils.fsdp2_switch_optimizer_parameters",headingTag:"h2"}}),Q=new v({props:{name:"accelerate.utils.fsdp2_switch_optimizer_parameters",anchor:"accelerate.utils.fsdp2_switch_optimizer_parameters",parameters:[{name:"optimizer",val:": Optimizer"},{name:"mapping",val:": dict"}],parametersDescription:[{anchor:"accelerate.utils.fsdp2_switch_optimizer_parameters.optimizer",description:"<strong>optimizer</strong> (<code>torch.optim.Optimizer</code>) &#x2014; Optimizer instance which contains the original model parameters",name:"optimizer"},{anchor:"accelerate.utils.fsdp2_switch_optimizer_parameters.mapping",description:"<strong>mapping</strong> (<code>dict</code>) &#x2014; Mapping from the original parameter (specified by <code>data_ptr</code>) to the sharded parameter",name:"mapping"}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/fsdp_utils.py#L563",raiseDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<ul>
<li><code>KeyError</code> —
If a parameter in the optimizer couldn’t be switched to its sharded version. This should never happen and
indicates a bug. If we kept the original params instead of raising, the training wouldn’t be numerically
correct and weights wouldn’t get updated.</li>
</ul>
`,raiseType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>KeyError</code></p>
`}}),X=new T({props:{title:"fsdp2_prepare_model",local:"accelerate.utils.fsdp2_prepare_model",headingTag:"h2"}}),Y=new v({props:{name:"accelerate.utils.fsdp2_prepare_model",anchor:"accelerate.utils.fsdp2_prepare_model",parameters:[{name:"accelerator",val:""},{name:"model",val:": Module"}],parametersDescription:[{anchor:"accelerate.utils.fsdp2_prepare_model.accelerator",description:"<strong>accelerator</strong> (<code>Accelerator</code>) &#x2014; The accelerator instance",name:"accelerator"},{anchor:"accelerate.utils.fsdp2_prepare_model.model",description:"<strong>model</strong> (<code>torch.nn.Module</code>) &#x2014; The model to prepare",name:"model"}],source:"https://github.com/huggingface/accelerate/blob/vr_4021/src/accelerate/utils/fsdp_utils.py#L645",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>Prepared model</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>torch.nn.Module</code></p>
`}}),Z=new T({props:{title:"fsdp2_prepare_auto_wrap_policy",local:"fsdp2prepareautowrappolicy",headingTag:"h2"}}),ee=new Tt({props:{source:"https://github.com/huggingface/accelerate/blob/main/docs/source/package_reference/fsdp.md"}}),{c(){w=l("meta"),ge=r(),_e=l("p"),he=r(),s(U.$$.fragment),be=r(),s(O.$$.fragment),ye=r(),s(L.$$.fragment),$e=r(),P=l("div"),s(M.$$.fragment),je=r(),te=l("p"),te.textContent=st,ve=r(),s(I.$$.fragment),we=r(),x=l("div"),s(E.$$.fragment),Ge=r(),ae=l("p"),ae.textContent=ct,Pe=r(),s(R.$$.fragment),xe=r(),$=l("div"),s(A.$$.fragment),Ke=r(),re=l("p"),re.innerHTML=pt,Je=r(),oe=l("p"),oe.textContent=ft,Se=r(),s(H.$$.fragment),De=r(),u=l("div"),s(V.$$.fragment),Qe=r(),ne=l("p"),ne.textContent=mt,Xe=r(),C=l("div"),s(q.$$.fragment),Ye=r(),ie=l("p"),ie.innerHTML=_t,Ze=r(),k=l("div"),s(W.$$.fragment),et=r(),le=l("p"),le.textContent=ut,tt=r(),N=l("div"),s(B.$$.fragment),at=r(),de=l("p"),de.innerHTML=gt,rt=r(),z=l("div"),s(j.$$.fragment),ot=r(),se=l("p"),se.textContent=ht,Fe=r(),s(G.$$.fragment),Te=r(),S=l("div"),s(K.$$.fragment),nt=r(),ce=l("p"),ce.textContent=bt,Ce=r(),s(J.$$.fragment),ke=r(),D=l("div"),s(Q.$$.fragment),it=r(),pe=l("p"),pe.textContent=yt,Ne=r(),s(X.$$.fragment),ze=r(),F=l("div"),s(Y.$$.fragment),lt=r(),fe=l("p"),fe.textContent=$t,Ue=r(),s(Z.$$.fragment),Oe=r(),s(ee.$$.fragment),Le=r(),ue=l("p"),this.h()},l(e){const a=Dt("svelte-u9bgzb",document.head);w=d(a,"META",{name:!0,content:!0}),a.forEach(t),ge=o(e),_e=d(e,"P",{}),g(_e).forEach(t),he=o(e),c(U.$$.fragment,e),be=o(e),c(O.$$.fragment,e),ye=o(e),c(L.$$.fragment,e),$e=o(e),P=d(e,"DIV",{class:!0});var Ie=g(P);c(M.$$.fragment,Ie),je=o(Ie),te=d(Ie,"P",{"data-svelte-h":!0}),y(te)!=="svelte-1lsbcnp"&&(te.textContent=st),Ie.forEach(t),ve=o(e),c(I.$$.fragment,e),we=o(e),x=d(e,"DIV",{class:!0});var Ee=g(x);c(E.$$.fragment,Ee),Ge=o(Ee),ae=d(Ee,"P",{"data-svelte-h":!0}),y(ae)!=="svelte-o9zxg8"&&(ae.textContent=ct),Ee.forEach(t),Pe=o(e),c(R.$$.fragment,e),xe=o(e),$=d(e,"DIV",{class:!0});var me=g($);c(A.$$.fragment,me),Ke=o(me),re=d(me,"P",{"data-svelte-h":!0}),y(re)!=="svelte-lbcivn"&&(re.innerHTML=pt),Je=o(me),oe=d(me,"P",{"data-svelte-h":!0}),y(oe)!=="svelte-s693kt"&&(oe.textContent=ft),me.forEach(t),Se=o(e),c(H.$$.fragment,e),De=o(e),u=d(e,"DIV",{class:!0});var b=g(u);c(V.$$.fragment,b),Qe=o(b),ne=d(b,"P",{"data-svelte-h":!0}),y(ne)!=="svelte-da89af"&&(ne.textContent=mt),Xe=o(b),C=d(b,"DIV",{class:!0});var Re=g(C);c(q.$$.fragment,Re),Ye=o(Re),ie=d(Re,"P",{"data-svelte-h":!0}),y(ie)!=="svelte-1cfoaqn"&&(ie.innerHTML=_t),Re.forEach(t),Ze=o(b),k=d(b,"DIV",{class:!0});var Ae=g(k);c(W.$$.fragment,Ae),et=o(Ae),le=d(Ae,"P",{"data-svelte-h":!0}),y(le)!=="svelte-1oofbyv"&&(le.textContent=ut),Ae.forEach(t),tt=o(b),N=d(b,"DIV",{class:!0});var He=g(N);c(B.$$.fragment,He),at=o(He),de=d(He,"P",{"data-svelte-h":!0}),y(de)!=="svelte-1ugzx3"&&(de.innerHTML=gt),He.forEach(t),rt=o(b),z=d(b,"DIV",{class:!0});var Ve=g(z);c(j.$$.fragment,Ve),ot=o(Ve),se=d(Ve,"P",{"data-svelte-h":!0}),y(se)!=="svelte-w1jkhd"&&(se.textContent=ht),Ve.forEach(t),b.forEach(t),Fe=o(e),c(G.$$.fragment,e),Te=o(e),S=d(e,"DIV",{class:!0});var qe=g(S);c(K.$$.fragment,qe),nt=o(qe),ce=d(qe,"P",{"data-svelte-h":!0}),y(ce)!=="svelte-1kruoq8"&&(ce.textContent=bt),qe.forEach(t),Ce=o(e),c(J.$$.fragment,e),ke=o(e),D=d(e,"DIV",{class:!0});var We=g(D);c(Q.$$.fragment,We),it=o(We),pe=d(We,"P",{"data-svelte-h":!0}),y(pe)!=="svelte-cvynzy"&&(pe.textContent=yt),We.forEach(t),Ne=o(e),c(X.$$.fragment,e),ze=o(e),F=d(e,"DIV",{class:!0});var Be=g(F);c(Y.$$.fragment,Be),lt=o(Be),fe=d(Be,"P",{"data-svelte-h":!0}),y(fe)!=="svelte-pwh3xf"&&(fe.textContent=$t),Be.forEach(t),Ue=o(e),c(Z.$$.fragment,e),Oe=o(e),c(ee.$$.fragment,e),Le=o(e),ue=d(e,"P",{}),g(ue).forEach(t),this.h()},h(){h(w,"name","hf:doc:metadata"),h(w,"content",kt),h(P,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(u,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),h(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,a){n(document.head,w),i(e,ge,a),i(e,_e,a),i(e,he,a),p(U,e,a),i(e,be,a),p(O,e,a),i(e,ye,a),p(L,e,a),i(e,$e,a),i(e,P,a),p(M,P,null),n(P,je),n(P,te),i(e,ve,a),p(I,e,a),i(e,we,a),i(e,x,a),p(E,x,null),n(x,Ge),n(x,ae),i(e,Pe,a),p(R,e,a),i(e,xe,a),i(e,$,a),p(A,$,null),n($,Ke),n($,re),n($,Je),n($,oe),i(e,Se,a),p(H,e,a),i(e,De,a),i(e,u,a),p(V,u,null),n(u,Qe),n(u,ne),n(u,Xe),n(u,C),p(q,C,null),n(C,Ye),n(C,ie),n(u,Ze),n(u,k),p(W,k,null),n(k,et),n(k,le),n(u,tt),n(u,N),p(B,N,null),n(N,at),n(N,de),n(u,rt),n(u,z),p(j,z,null),n(z,ot),n(z,se),i(e,Fe,a),p(G,e,a),i(e,Te,a),i(e,S,a),p(K,S,null),n(S,nt),n(S,ce),i(e,Ce,a),p(J,e,a),i(e,ke,a),i(e,D,a),p(Q,D,null),n(D,it),n(D,pe),i(e,Ne,a),p(X,e,a),i(e,ze,a),i(e,F,a),p(Y,F,null),n(F,lt),n(F,fe),i(e,Ue,a),p(Z,e,a),i(e,Oe,a),p(ee,e,a),i(e,Le,a),i(e,ue,a),Me=!0},p:wt,i(e){Me||(f(U.$$.fragment,e),f(O.$$.fragment,e),f(L.$$.fragment,e),f(M.$$.fragment,e),f(I.$$.fragment,e),f(E.$$.fragment,e),f(R.$$.fragment,e),f(A.$$.fragment,e),f(H.$$.fragment,e),f(V.$$.fragment,e),f(q.$$.fragment,e),f(W.$$.fragment,e),f(B.$$.fragment,e),f(j.$$.fragment,e),f(G.$$.fragment,e),f(K.$$.fragment,e),f(J.$$.fragment,e),f(Q.$$.fragment,e),f(X.$$.fragment,e),f(Y.$$.fragment,e),f(Z.$$.fragment,e),f(ee.$$.fragment,e),Me=!0)},o(e){m(U.$$.fragment,e),m(O.$$.fragment,e),m(L.$$.fragment,e),m(M.$$.fragment,e),m(I.$$.fragment,e),m(E.$$.fragment,e),m(R.$$.fragment,e),m(A.$$.fragment,e),m(H.$$.fragment,e),m(V.$$.fragment,e),m(q.$$.fragment,e),m(W.$$.fragment,e),m(B.$$.fragment,e),m(j.$$.fragment,e),m(G.$$.fragment,e),m(K.$$.fragment,e),m(J.$$.fragment,e),m(Q.$$.fragment,e),m(X.$$.fragment,e),m(Y.$$.fragment,e),m(Z.$$.fragment,e),m(ee.$$.fragment,e),Me=!1},d(e){e&&(t(ge),t(_e),t(he),t(be),t(ye),t($e),t(P),t(ve),t(we),t(x),t(Pe),t(xe),t($),t(Se),t(De),t(u),t(Fe),t(Te),t(S),t(Ce),t(ke),t(D),t(Ne),t(ze),t(F),t(Ue),t(Oe),t(Le),t(ue)),t(w),_(U,e),_(O,e),_(L,e),_(M),_(I,e),_(E),_(R,e),_(A),_(H,e),_(V),_(q),_(W),_(B),_(j),_(G,e),_(K),_(J,e),_(Q),_(X,e),_(Y),_(Z,e),_(ee,e)}}}const kt='{"title":"Fully Sharded Data Parallel utilities","local":"fully-sharded-data-parallel-utilities","sections":[{"title":"enable_fsdp_ram_efficient_loading","local":"accelerate.utils.enable_fsdp_ram_efficient_loading","sections":[],"depth":2},{"title":"disable_fsdp_ram_efficient_loading","local":"accelerate.utils.disable_fsdp_ram_efficient_loading","sections":[],"depth":2},{"title":"merge_fsdp_weights","local":"accelerate.utils.merge_fsdp_weights","sections":[],"depth":2},{"title":"FullyShardedDataParallelPlugin","local":"accelerate.FullyShardedDataParallelPlugin","sections":[],"depth":2},{"title":"fsdp2_load_full_state_dict","local":"accelerate.utils.fsdp2_load_full_state_dict","sections":[],"depth":2},{"title":"fsdp2_switch_optimizer_parameters","local":"accelerate.utils.fsdp2_switch_optimizer_parameters","sections":[],"depth":2},{"title":"fsdp2_prepare_model","local":"accelerate.utils.fsdp2_prepare_model","sections":[],"depth":2},{"title":"fsdp2_prepare_auto_wrap_policy","local":"fsdp2prepareautowrappolicy","sections":[],"depth":2}],"depth":1}';function Nt(dt){return Pt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Mt extends xt{constructor(w){super(),St(this,w,Nt,Ct,vt,{})}}export{Mt as component};

Xet Storage Details

Size:
29.7 kB
·
Xet hash:
7189ab4cb9f004a81835f51ab27265d70a7f93913ed4736d14459fd3c7aaeba1

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.