Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"DeepSpeed","local":"deepspeed","sections":[{"title":"What is integrated?","local":"what-is-integrated","sections":[],"depth":2},{"title":"How it works?","local":"how-it-works","sections":[{"title":"Accelerate DeepSpeed Plugin","local":"accelerate-deepspeed-plugin","sections":[],"depth":3},{"title":"DeepSpeed Config File","local":"deepspeed-config-file","sections":[],"depth":3}],"depth":2},{"title":"Saving and loading","local":"saving-and-loading","sections":[],"depth":2},{"title":"ZeRO Inference","local":"zero-inference","sections":[],"depth":2},{"title":"Few caveats to be aware of","local":"few-caveats-to-be-aware-of","sections":[],"depth":2},{"title":"Multi-node DeepSpeed","local":"multi-node-deepspeed","sections":[],"depth":2},{"title":"DeepSpeed Resources","local":"deepspeed-resources","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/accelerate/pr_4021/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/entry/start.8a49e72b.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/scheduler.b9285784.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/singletons.7547c222.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/index.6d423e5c.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/paths.d42c9205.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/entry/app.1df4d18e.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/preload-helper.b0bd19d1.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/index.26bc89a1.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/nodes/0.0e7c56e8.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/nodes/42.bf269326.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/Tip.e4eba3d6.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.7a0ae628.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/CodeBlock.844ff9c3.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"DeepSpeed","local":"deepspeed","sections":[{"title":"What is integrated?","local":"what-is-integrated","sections":[],"depth":2},{"title":"How it works?","local":"how-it-works","sections":[{"title":"Accelerate DeepSpeed Plugin","local":"accelerate-deepspeed-plugin","sections":[],"depth":3},{"title":"DeepSpeed Config File","local":"deepspeed-config-file","sections":[],"depth":3}],"depth":2},{"title":"Saving and loading","local":"saving-and-loading","sections":[],"depth":2},{"title":"ZeRO Inference","local":"zero-inference","sections":[],"depth":2},{"title":"Few caveats to be aware of","local":"few-caveats-to-be-aware-of","sections":[],"depth":2},{"title":"Multi-node DeepSpeed","local":"multi-node-deepspeed","sections":[],"depth":2},{"title":"DeepSpeed Resources","local":"deepspeed-resources","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="deepspeed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deepspeed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DeepSpeed</span></h1> <p data-svelte-h="svelte-d7jnly"><a href="https://github.com/deepspeedai/DeepSpeed" rel="nofollow">DeepSpeed</a> implements everything described in the <a href="https://huggingface.co/papers/1910.02054" rel="nofollow">ZeRO paper</a>. Some of the salient optimizations are:</p> <ol data-svelte-h="svelte-3z90ra"><li>Optimizer state partitioning (ZeRO stage 1)</li> <li>Gradient partitioning (ZeRO stage 2)</li> <li>Parameter partitioning (ZeRO stage 3)</li> <li>Custom mixed precision training handling</li> <li>A range of fast CUDA-extension-based optimizers</li> <li>ZeRO-Offload to CPU and Disk/NVMe</li> <li>Hierarchical partitioning of model parameters (ZeRO++)</li></ol> <p data-svelte-h="svelte-18056kh">ZeRO-Offload has its own dedicated paper: <a href="https://huggingface.co/papers/2101.06840" rel="nofollow">ZeRO-Offload: Democratizing Billion-Scale Model Training</a>. And NVMe-support is described in the paper <a href="https://huggingface.co/papers/2104.07857" rel="nofollow">ZeRO-Infinity: Breaking the GPU | |
| Memory Wall for Extreme Scale Deep Learning</a>.</p> <p data-svelte-h="svelte-iaccrf">DeepSpeed ZeRO-2 is primarily used only for training, as its features are of no use to inference.</p> <p data-svelte-h="svelte-l1xxpu">DeepSpeed ZeRO-3 can be used for inference as well since it allows huge models to be loaded on multiple GPUs, which | |
| won’t be possible on a single GPU.</p> <p data-svelte-h="svelte-a6ic94">Accelerate integrates <a href="https://github.com/deepspeedai/DeepSpeed" rel="nofollow">DeepSpeed</a> via 2 options:</p> <ol data-svelte-h="svelte-15nzigp"><li>Integration of the DeepSpeed features via <code>deepspeed config file</code> specification in <code>accelerate config</code> . You just supply your custom config file or use our template. Most of | |
| this document is focused on this feature. This supports all the core features of DeepSpeed and gives user a lot of flexibility. | |
| User may have to change a few lines of code depending on the config.</li> <li>Integration via <code>deepspeed_plugin</code>.This supports subset of the DeepSpeed features and uses default options for the rest of the configurations. | |
| User need not change any code and is good for those who are fine with most of the default settings of DeepSpeed.</li></ol> <h2 class="relative group"><a id="what-is-integrated" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-is-integrated"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What is integrated?</span></h2> <p data-svelte-h="svelte-1igpel8">Training:</p> <ol data-svelte-h="svelte-1dxmiag"><li>Accelerate integrates all features of DeepSpeed ZeRO. This includes all the ZeRO stages 1, 2 and 3 as well as ZeRO-Offload, ZeRO-Infinity (which can offload to disk/NVMe) and ZeRO++. | |
| Below is a short description of Data Parallelism using ZeRO - Zero Redundancy Optimizer along with diagram from this <a href="https://www.microsoft.com/en-us/research/blog/zero-deepspeed-new-system-optimizations-enable-training-models-with-over-100-billion-parameters/" rel="nofollow">blog post</a> <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/parallelism-zero.png" alt="ZeRO Data Parallelism"></li></ol> <p data-svelte-h="svelte-134sd43">(Source: <a href="https://www.microsoft.com/en-us/research/blog/zero-deepspeed-new-system-optimizations-enable-training-models-with-over-100-billion-parameters/" rel="nofollow">link</a>)</p> <p data-svelte-h="svelte-1d4qqra">a. <strong>Stage 1</strong> : Shards optimizer states across data parallel workers/GPUs</p> <p data-svelte-h="svelte-n0e1lo">b. <strong>Stage 2</strong> : Shards optimizer states + gradients across data parallel workers/GPUs</p> <p data-svelte-h="svelte-1ob4qwq">c. <strong>Stage 3</strong>: Shards optimizer states + gradients + model parameters across data parallel workers/GPUs</p> <p data-svelte-h="svelte-f1y4d9">d. <strong>Optimizer Offload</strong>: Offloads the gradients + optimizer states to CPU/Disk building on top of ZERO Stage 2</p> <p data-svelte-h="svelte-1sgbcs5">e. <strong>Param Offload</strong>: Offloads the model parameters to CPU/Disk building on top of ZERO Stage 3</p> <p data-svelte-h="svelte-yiw0au">f. <strong>Hierarchical Partitioning</strong>: Enables efficient multi-node training with data-parallel training across nodes and ZeRO-3 sharding within a node, built on top of ZeRO Stage 3.</p> <u data-svelte-h="svelte-248d1k">Note</u>: With respect to Disk Offload, the disk should be an NVME for decent speed but it technically works on any Disk | |
| <p data-svelte-h="svelte-1i9b1jx">Inference:</p> <ol data-svelte-h="svelte-yjfbl7"><li>DeepSpeed ZeRO Inference supports ZeRO stage 3 with ZeRO-Infinity. It uses the same ZeRO protocol as training, but | |
| it doesn’t use an optimizer and a lr scheduler and only stage 3 is relevant. For more details see: | |
| <a href="#deepspeed-zero-inference">deepspeed-zero-inference</a>.</li></ol> <h2 class="relative group"><a id="how-it-works" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#how-it-works"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>How it works?</span></h2> <p data-svelte-h="svelte-1k0k3hd"><strong>Pre-Requisites</strong>: Install DeepSpeed version >=0.6.5. Please refer to the <a href="https://github.com/deepspeedai/DeepSpeed#installation" rel="nofollow">DeepSpeed Installation details</a> | |
| for more information.</p> <p data-svelte-h="svelte-yplg2o">We will first look at easy to use integration via <code>accelerate config</code>. | |
| Followed by more flexible and feature rich <code>deepspeed config file</code> integration.</p> <h3 class="relative group"><a id="accelerate-deepspeed-plugin" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#accelerate-deepspeed-plugin"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Accelerate DeepSpeed Plugin</span></h3> <p data-svelte-h="svelte-3pof6s">On your machine(s) just run:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ik26hy">and answer the questions asked. It will ask whether you want to use a config file for DeepSpeed to which you should answer no. Then answer the following questions to generate a basic DeepSpeed config. | |
| This will generate a config file that will be used automatically to properly set the | |
| default options when doing</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch my_script.py --args_to_my_script<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10u1bf7">For instance, here is how you would run the NLP example <code>examples/nlp_example.py</code> (from the root of the repo) with DeepSpeed Plugin:</p> <p data-svelte-h="svelte-vp1nym"><strong>ZeRO Stage-2 DeepSpeed Plugin Example</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->compute_environment: LOCAL_MACHINE | |
| deepspeed_config: | |
| gradient_accumulation_steps: 1 | |
| gradient_clipping: 1.0 | |
| offload_optimizer_device: none | |
| offload_param_device: none | |
| zero3_init_flag: <span class="hljs-literal">true</span> | |
| zero_stage: 2 | |
| distributed_type: DEEPSPEED | |
| fsdp_config: {} | |
| machine_rank: 0 | |
| main_process_ip: null | |
| main_process_port: null | |
| main_training_function: main | |
| mixed_precision: fp16 | |
| num_machines: 1 | |
| num_processes: 2 | |
| use_cpu: <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch examples/nlp_example.py --mixed_precision fp16<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12x21g8"><strong>ZeRO Stage-3 with CPU Offload DeepSpeed Plugin Example</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->compute_environment: LOCAL_MACHINE | |
| deepspeed_config: | |
| gradient_accumulation_steps: 1 | |
| gradient_clipping: 1.0 | |
| offload_optimizer_device: cpu | |
| offload_param_device: cpu | |
| zero3_init_flag: <span class="hljs-literal">true</span> | |
| zero3_save_16bit_model: <span class="hljs-literal">true</span> | |
| zero_stage: 3 | |
| distributed_type: DEEPSPEED | |
| fsdp_config: {} | |
| machine_rank: 0 | |
| main_process_ip: null | |
| main_process_port: null | |
| main_training_function: main | |
| mixed_precision: fp16 | |
| num_machines: 1 | |
| num_processes: 2 | |
| use_cpu: <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch examples/nlp_example.py --mixed_precision fp16<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-lh95an">Currently, <code>Accelerate</code> supports following config through the CLI:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->`zero_stage`: [0] Disabled, [1] optimizer state partitioning, [2] optimizer+gradient state partitioning and [3] optimizer+gradient+parameter partitioning | |
| `gradient_accumulation_steps`: Number of training steps to accumulate gradients before averaging and applying them. | |
| `gradient_clipping`: Enable gradient clipping with value. | |
| `offload_optimizer_device`: [none] Disable optimizer offloading, [cpu] offload optimizer to CPU, [nvme] offload optimizer to NVMe SSD. Only applicable with ZeRO >= Stage-2. | |
| `offload_optimizer_nvme_path`: Decides Nvme Path to offload optimizer states. If unspecified, will default to <span class="hljs-string">'none'</span>. | |
| `offload_param_device`: [none] Disable parameter offloading, [cpu] offload parameters to CPU, [nvme] offload parameters to NVMe SSD. Only applicable with ZeRO Stage-3. | |
| `offload_param_nvme_path`: Decides Nvme Path to offload parameters. If unspecified, will default to <span class="hljs-string">'none'</span>. | |
| `zero3_init_flag`: Decides whether to <span class="hljs-built_in">enable</span> `deepspeed.zero.Init` <span class="hljs-keyword">for</span> constructing massive models. Only applicable with ZeRO Stage-3. | |
| `zero3_save_16bit_model`: Decides whether to save 16-bit model weights when using ZeRO Stage-3. | |
| `mixed_precision`: `no` <span class="hljs-keyword">for</span> FP32 training, `fp16` <span class="hljs-keyword">for</span> FP16 mixed-precision training and `bf16` <span class="hljs-keyword">for</span> BF16 mixed-precision training. | |
| `deepspeed_moe_layer_cls_names`: Comma-separated list of transformer Mixture-of-Experts (MoE) layer class names (case-sensitive) to wrap ,e.g, `MixtralSparseMoeBlock`, `Qwen2MoeSparseMoeBlock`, `JetMoEAttention,JetMoEBlock` ... | |
| `deepspeed_hostfile`: DeepSpeed hostfile <span class="hljs-keyword">for</span> configuring multi-node compute resources. | |
| `deepspeed_exclusion_filter`: DeepSpeed exclusion filter string when using mutli-node setup. | |
| `deepspeed_inclusion_filter`: DeepSpeed inclusion filter string when using mutli-node setup. | |
| `deepspeed_multinode_launcher`: DeepSpeed multi-node launcher to use, e.g. `pdsh`, `standard`, `openmpi`, `mvapich`, `mpich`, `slurm`, `nossh` (requires DeepSpeed >= 0.14.5). If unspecified, will default to `pdsh`. | |
| `deepspeed_config_file`: path to the DeepSpeed config file <span class="hljs-keyword">in</span> `json` format. See the next section <span class="hljs-keyword">for</span> more details on this.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1spmm13">To be able to tweak more options, you will need to use a DeepSpeed config file.</p> <h3 class="relative group"><a id="deepspeed-config-file" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deepspeed-config-file"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DeepSpeed Config File</span></h3> <p data-svelte-h="svelte-3pof6s">On your machine(s) just run:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate config<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vp75xs">and answer the questions asked. It will ask whether you want to use a config file for deepspeed to which you answer yes | |
| and provide the path to the deepspeed config file. | |
| This will generate a config file that will be used automatically to properly set the | |
| default options when doing</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch my_script.py --args_to_my_script<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-17th5u9">For instance, here is how you would run the NLP example <code>examples/by_feature/deepspeed_with_config_support.py</code> (from the root of the repo) with DeepSpeed Config File:</p> <p data-svelte-h="svelte-1l59g75"><strong>ZeRO Stage-2 DeepSpeed Config File Example</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->compute_environment: LOCAL_MACHINE | |
| deepspeed_config: | |
| deepspeed_config_file: /home/ubuntu/accelerate/examples/deepspeed_config_templates/zero_stage2_config.json | |
| zero3_init_flag: <span class="hljs-literal">true</span> | |
| distributed_type: DEEPSPEED | |
| fsdp_config: {} | |
| machine_rank: 0 | |
| main_process_ip: null | |
| main_process_port: null | |
| main_training_function: main | |
| mixed_precision: fp16 | |
| num_machines: 1 | |
| num_processes: 2 | |
| use_cpu: <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1iv8u1c">with the contents of <code>zero_stage2_config.json</code> being:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"fp16"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"enabled"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"loss_scale"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"loss_scale_window"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1000</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"initial_scale_power"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">16</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hysteresis"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"min_loss_scale"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"optimizer"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"AdamW"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"params"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"lr"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"weight_decay"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"torch_adam"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"adam_w_mode"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"scheduler"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"WarmupDecayLR"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"params"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"warmup_min_lr"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"warmup_max_lr"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"warmup_num_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"total_num_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"zero_optimization"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"stage"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"allgather_partitions"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"allgather_bucket_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2e8</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"overlap_comm"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"reduce_scatter"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"reduce_bucket_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"contiguous_gradients"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_accumulation_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_clipping"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"steps_per_print"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2000</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_batch_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_micro_batch_size_per_gpu"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"wall_clock_breakdown"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch examples/by_feature/deepspeed_with_config_support.py \ | |
| --config_name <span class="hljs-string">"gpt2-large"</span> \ | |
| --tokenizer_name <span class="hljs-string">"gpt2-large"</span> \ | |
| --dataset_name <span class="hljs-string">"wikitext"</span> \ | |
| --dataset_config_name <span class="hljs-string">"wikitext-2-raw-v1"</span> \ | |
| --block_size 128 \ | |
| --output_dir <span class="hljs-string">"./clm/clm_deepspeed_stage2_accelerate"</span> \ | |
| --learning_rate 5e-4 \ | |
| --per_device_train_batch_size 24 \ | |
| --per_device_eval_batch_size 24 \ | |
| --num_train_epochs 3 \ | |
| --with_tracking \ | |
| --report_to <span class="hljs-string">"wandb"</span>\<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3yuze5"><strong>ZeRO Stage-3 with CPU offload DeepSpeed Config File Example</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->compute_environment: LOCAL_MACHINE | |
| deepspeed_config: | |
| deepspeed_config_file: /home/ubuntu/accelerate/examples/deepspeed_config_templates/zero_stage3_offload_config.json | |
| zero3_init_flag: <span class="hljs-literal">true</span> | |
| distributed_type: DEEPSPEED | |
| fsdp_config: {} | |
| machine_rank: 0 | |
| main_process_ip: null | |
| main_process_port: null | |
| main_training_function: main | |
| mixed_precision: fp16 | |
| num_machines: 1 | |
| num_processes: 2 | |
| use_cpu: <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15nfwtv">with the contents of <code>zero_stage3_offload_config.json</code> being:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"fp16"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"enabled"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"loss_scale"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"loss_scale_window"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1000</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"initial_scale_power"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">16</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"hysteresis"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"min_loss_scale"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"optimizer"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"AdamW"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"params"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"lr"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"weight_decay"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"scheduler"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"type"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"WarmupDecayLR"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"params"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"warmup_min_lr"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"warmup_max_lr"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"warmup_num_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"total_num_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"zero_optimization"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"stage"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">3</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"offload_optimizer"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"device"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"cpu"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"pin_memory"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"offload_param"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"device"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"cpu"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"pin_memory"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"overlap_comm"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"contiguous_gradients"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"reduce_bucket_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stage3_prefetch_bucket_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stage3_param_persistence_threshold"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"sub_group_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1e9</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stage3_max_live_parameters"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1e9</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stage3_max_reuse_distance"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1e9</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stage3_gather_16bit_weights_on_model_save"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_accumulation_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_clipping"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"steps_per_print"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2000</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_batch_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_micro_batch_size_per_gpu"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"wall_clock_breakdown"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->accelerate launch examples/by_feature/deepspeed_with_config_support.py \ | |
| --config_name <span class="hljs-string">"gpt2-large"</span> \ | |
| --tokenizer_name <span class="hljs-string">"gpt2-large"</span> \ | |
| --dataset_name <span class="hljs-string">"wikitext"</span> \ | |
| --dataset_config_name <span class="hljs-string">"wikitext-2-raw-v1"</span> \ | |
| --block_size 128 \ | |
| --output_dir <span class="hljs-string">"./clm/clm_deepspeed_stage3_offload_accelerate"</span> \ | |
| --learning_rate 5e-4 \ | |
| --per_device_train_batch_size 32 \ | |
| --per_device_eval_batch_size 32 \ | |
| --num_train_epochs 3 \ | |
| --with_tracking \ | |
| --report_to <span class="hljs-string">"wandb"</span>\<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cyo8oc"><strong>ZeRO++ Config Example</strong> | |
| You can use the features of ZeRO++ by using the appropriate config parameters. Note that ZeRO++ is an extension for ZeRO Stage 3. Here is how the config file can be modified, from <a href="https://www.deepspeed.ai/tutorials/zeropp/" rel="nofollow">DeepSpeed’s ZeRO++ tutorial</a>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"zero_optimization"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"stage"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">3</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"reduce_bucket_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"zero_quantized_weights"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"zero_hpz_partition_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">8</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"zero_quantized_gradients"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"contiguous_gradients"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"overlap_comm"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14gqfge">For hierarchical partitioning, the partition size <code>zero_hpz_partition_size</code> should ideally be set to the number of GPUs per node. (For example, the above config file assumes 8 GPUs per node)</p> <p data-svelte-h="svelte-1y80l2"><strong>Important code changes when using DeepSpeed Config File</strong></p> <ol><li><p data-svelte-h="svelte-19wri7m">DeepSpeed Optimizers and Schedulers. For more information on these, | |
| see the <a href="https://deepspeed.readthedocs.io/en/latest/optimizers.html" rel="nofollow">DeepSpeed Optimizers</a> and <a href="https://deepspeed.readthedocs.io/en/latest/schedulers.html" rel="nofollow">DeepSpeed Schedulers</a> documentation. | |
| We will look at the changes needed in the code when using these.</p> <p data-svelte-h="svelte-1w1lp0n">a. DS Optim + DS Scheduler: The case when both <code>optimizer</code> and <code>scheduler</code> keys are present in the DeepSpeed config file. | |
| In this situation, those will be used and the user has to use <code>accelerate.utils.DummyOptim</code> and <code>accelerate.utils.DummyScheduler</code> to replace the PyTorch/Custom optimizers and schedulers in their code. | |
| Below is the snippet from <code>examples/by_feature/deepspeed_with_config_support.py</code> showing this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> <span class="hljs-comment"># Creates Dummy Optimizer if `optimizer` was specified in the config file else creates Adam Optimizer</span> | |
| optimizer_cls = ( | |
| torch.optim.AdamW | |
| <span class="hljs-keyword">if</span> accelerator.state.deepspeed_plugin <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span> | |
| <span class="hljs-keyword">or</span> <span class="hljs-string">"optimizer"</span> <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> accelerator.state.deepspeed_plugin.deepspeed_config | |
| <span class="hljs-keyword">else</span> DummyOptim | |
| ) | |
| optimizer = optimizer_cls(optimizer_grouped_parameters, lr=args.learning_rate) | |
| <span class="hljs-comment"># Creates Dummy Scheduler if `scheduler` was specified in the config file else creates `args.lr_scheduler_type` Scheduler</span> | |
| <span class="hljs-keyword">if</span> ( | |
| accelerator.state.deepspeed_plugin <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span> | |
| <span class="hljs-keyword">or</span> <span class="hljs-string">"scheduler"</span> <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> accelerator.state.deepspeed_plugin.deepspeed_config | |
| ): | |
| lr_scheduler = get_scheduler( | |
| name=args.lr_scheduler_type, | |
| optimizer=optimizer, | |
| num_warmup_steps=args.num_warmup_steps, | |
| num_training_steps=args.max_train_steps, | |
| ) | |
| <span class="hljs-keyword">else</span>: | |
| lr_scheduler = DummyScheduler( | |
| optimizer, total_num_steps=args.max_train_steps, warmup_num_steps=args.num_warmup_steps | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ce0g29">b. Custom Optim + Custom Scheduler: The case when both <code>optimizer</code> and <code>scheduler</code> keys are absent in the DeepSpeed config file. | |
| In this situation, no code changes are needed from the user and this is the case when using integration via DeepSpeed Plugin. | |
| In the above example we can see that the code remains unchanged if the <code>optimizer</code> and <code>scheduler</code> keys are absent in the DeepSpeed config file.</p> <p data-svelte-h="svelte-fgwfk1">c. Custom Optim + DS Scheduler: The case when only <code>scheduler</code> key is present in the DeepSpeed config file. | |
| In this situation, the user has to use <code>accelerate.utils.DummyScheduler</code> to replace the PyTorch/Custom scheduler in their code.</p> <p data-svelte-h="svelte-jof0o2">d. DS Optim + Custom Scheduler: The case when only <code>optimizer</code> key is present in the DeepSpeed config file. | |
| This will result in an error because you can only use DS Scheduler when using DS Optim.</p></li> <li data-svelte-h="svelte-1k2nlfe"><p>Notice the <code>auto</code> values in the above example DeepSpeed config files. These are automatically handled by <code>prepare</code> method | |
| based on model, dataloaders, dummy optimizer and dummy schedulers provided to <code>prepare</code> method. | |
| Only the <code>auto</code> fields specified in above examples are handled by <code>prepare</code> method and the rest have to be explicitly specified by the user.</p></li></ol> <p data-svelte-h="svelte-1nruzcp">The <code>auto</code> values are calculated as:</p> <ul data-svelte-h="svelte-12xd0n5"><li><code>reduce_bucket_size</code>: <code>hidden_size * hidden_size</code></li> <li><code>stage3_prefetch_bucket_size</code>: <code>int(0.9 * hidden_size * hidden_size)</code></li> <li><code>stage3_param_persistence_threshold</code>: <code>10 * hidden_size</code></li></ul> <p data-svelte-h="svelte-1d6bbd1">For the <code>auto</code> feature to work for these 3 config entries - Accelerate will use <code>model.config.hidden_size</code> or <code>max(model.config.hidden_sizes)</code> as <code>hidden_size</code>. If neither of these is available, the launching will fail and you will have to set these 3 config entries manually. Remember the first 2 config entries are the communication buffers - the larger they are the more efficient the comms will be, and the larger they are the more GPU memory they will consume, so it’s a tunable performance trade-off.</p> <p data-svelte-h="svelte-nnfxti"><strong>Things to note when using DeepSpeed Config File</strong></p> <p data-svelte-h="svelte-2eh81g">Below is a sample script using <code>deepspeed_config_file</code> in different scenarios.</p> <p data-svelte-h="svelte-1r2ikm1">Code <code>test.py</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator | |
| <span class="hljs-keyword">from</span> accelerate.state <span class="hljs-keyword">import</span> AcceleratorState | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">main</span>(): | |
| accelerator = Accelerator() | |
| accelerator.<span class="hljs-built_in">print</span>(<span class="hljs-string">f"<span class="hljs-subst">{AcceleratorState()}</span>"</span>) | |
| <span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">"__main__"</span>: | |
| main()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-23a28o"><strong>Scenario 1</strong>: Manually tampered accelerate config file having <code>deepspeed_config_file</code> along with other entries.</p> <ol data-svelte-h="svelte-835v9i"><li>Content of the <code>accelerate</code> config:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">command_file:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">commands:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">compute_environment:</span> <span class="hljs-string">LOCAL_MACHINE</span> | |
| <span class="hljs-attr">deepspeed_config:</span> | |
| <span class="hljs-attr">gradient_accumulation_steps:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">gradient_clipping:</span> <span class="hljs-number">1.0</span> | |
| <span class="hljs-attr">offload_optimizer_device:</span> <span class="hljs-string">'cpu'</span> | |
| <span class="hljs-attr">offload_param_device:</span> <span class="hljs-string">'cpu'</span> | |
| <span class="hljs-attr">zero3_init_flag:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">zero3_save_16bit_model:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">zero_stage:</span> <span class="hljs-number">3</span> | |
| <span class="hljs-attr">deepspeed_config_file:</span> <span class="hljs-string">'ds_config.json'</span> | |
| <span class="hljs-attr">distributed_type:</span> <span class="hljs-string">DEEPSPEED</span> | |
| <span class="hljs-attr">downcast_bf16:</span> <span class="hljs-string">'no'</span> | |
| <span class="hljs-attr">dynamo_backend:</span> <span class="hljs-string">'NO'</span> | |
| <span class="hljs-attr">fsdp_config:</span> {} | |
| <span class="hljs-attr">gpu_ids:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">machine_rank:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-attr">main_process_ip:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">main_process_port:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">main_training_function:</span> <span class="hljs-string">main</span> | |
| <span class="hljs-attr">megatron_lm_config:</span> {} | |
| <span class="hljs-attr">num_machines:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">2</span> | |
| <span class="hljs-attr">rdzv_backend:</span> <span class="hljs-string">static</span> | |
| <span class="hljs-attr">same_network:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">tpu_name:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">tpu_zone:</span> <span class="hljs-literal">null</span> | |
| <span class="hljs-attr">use_cpu:</span> <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-1f7op2x"><li><code>ds_config.json</code>:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"bf16"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"enabled"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">true</span></span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"zero_optimization"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"stage"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">3</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stage3_gather_16bit_weights_on_model_save"</span><span class="hljs-punctuation">:</span> <span class="hljs-literal"><span class="hljs-keyword">false</span></span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"offload_optimizer"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"device"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"none"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"offload_param"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"device"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"none"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_clipping"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">1.0</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_batch_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_micro_batch_size_per_gpu"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_accumulation_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">10</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"steps_per_print"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2000000</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-czp5cr"><li>Output of <code>accelerate launch test.py</code>:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->ValueError: When using `deepspeed_config_file`, the following accelerate config variables will be ignored: | |
| [<span class="hljs-string">'gradient_accumulation_steps'</span>, <span class="hljs-string">'gradient_clipping'</span>, <span class="hljs-string">'zero_stage'</span>, <span class="hljs-string">'offload_optimizer_device'</span>, <span class="hljs-string">'offload_param_device'</span>, | |
| <span class="hljs-string">'zero3_save_16bit_model'</span>, <span class="hljs-string">'mixed_precision'</span>]. | |
| Please specify them appropriately <span class="hljs-keyword">in</span> the DeepSpeed config file. | |
| If you are using an accelerate config file, remove other config variables mentioned <span class="hljs-keyword">in</span> the above specified list. | |
| The easiest method is to create a new config following the questionnaire via `accelerate config`. | |
| It will only ask <span class="hljs-keyword">for</span> the necessary config variables when using `deepspeed_config_file`.<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-57upna"><strong>Scenario 2</strong>: Use the solution of the error to create new accelerate config and check that no ambiguity error is now thrown.</p> <ol data-svelte-h="svelte-jop1mq"><li>Run <code>accelerate config</code>:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ accelerate config | |
| ------------------------------------------------------------------------------------------------------------------------------- | |
| In <span class="hljs-built_in">which</span> compute environment are you running? | |
| This machine | |
| ------------------------------------------------------------------------------------------------------------------------------- | |
| Which <span class="hljs-built_in">type</span> of machine are you using? | |
| multi-GPU | |
| How many different machines will you use (use more than 1 <span class="hljs-keyword">for</span> multi-node training)? [1]: | |
| Do you wish to optimize your script with torch dynamo?[<span class="hljs-built_in">yes</span>/NO]: | |
| Do you want to use DeepSpeed? [<span class="hljs-built_in">yes</span>/NO]: <span class="hljs-built_in">yes</span> | |
| Do you want to specify a json file to a DeepSpeed config? [<span class="hljs-built_in">yes</span>/NO]: <span class="hljs-built_in">yes</span> | |
| Please enter the path to the json DeepSpeed config file: ds_config.json | |
| Do you want to <span class="hljs-built_in">enable</span> `deepspeed.zero.Init` when using ZeRO Stage-3 <span class="hljs-keyword">for</span> constructing massive models? [<span class="hljs-built_in">yes</span>/NO]: <span class="hljs-built_in">yes</span> | |
| How many GPU(s) should be used <span class="hljs-keyword">for</span> distributed training? [1]:4 | |
| accelerate configuration saved at ds_config_sample.yaml<!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-w41hdb"><li>Content of the <code>accelerate</code> config:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">compute_environment:</span> <span class="hljs-string">LOCAL_MACHINE</span> | |
| <span class="hljs-attr">deepspeed_config:</span> | |
| <span class="hljs-attr">deepspeed_config_file:</span> <span class="hljs-string">ds_config.json</span> | |
| <span class="hljs-attr">zero3_init_flag:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">distributed_type:</span> <span class="hljs-string">DEEPSPEED</span> | |
| <span class="hljs-attr">downcast_bf16:</span> <span class="hljs-string">'no'</span> | |
| <span class="hljs-attr">dynamo_backend:</span> <span class="hljs-string">'NO'</span> | |
| <span class="hljs-attr">fsdp_config:</span> {} | |
| <span class="hljs-attr">machine_rank:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-attr">main_training_function:</span> <span class="hljs-string">main</span> | |
| <span class="hljs-attr">megatron_lm_config:</span> {} | |
| <span class="hljs-attr">num_machines:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">num_processes:</span> <span class="hljs-number">4</span> | |
| <span class="hljs-attr">rdzv_backend:</span> <span class="hljs-string">static</span> | |
| <span class="hljs-attr">same_network:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">use_cpu:</span> <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <ol start="3" data-svelte-h="svelte-czp5cr"><li>Output of <code>accelerate launch test.py</code>:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Distributed environment: DEEPSPEED Backend: nccl | |
| Num processes: 4 | |
| Process index: 0 | |
| Local process index: 0 | |
| Device: cuda:0 | |
| Mixed precision <span class="hljs-built_in">type</span>: bf16 | |
| ds_config: {<span class="hljs-string">'bf16'</span>: {<span class="hljs-string">'enabled'</span>: True}, <span class="hljs-string">'zero_optimization'</span>: {<span class="hljs-string">'stage'</span>: 3, <span class="hljs-string">'stage3_gather_16bit_weights_on_model_save'</span>: False, <span class="hljs-string">'offload_optimizer'</span>: {<span class="hljs-string">'device'</span>: <span class="hljs-string">'none'</span>}, <span class="hljs-string">'offload_param'</span>: {<span class="hljs-string">'device'</span>: <span class="hljs-string">'none'</span>}}, <span class="hljs-string">'gradient_clipping'</span>: 1.0, <span class="hljs-string">'train_batch_size'</span>: <span class="hljs-string">'auto'</span>, <span class="hljs-string">'train_micro_batch_size_per_gpu'</span>: <span class="hljs-string">'auto'</span>, <span class="hljs-string">'gradient_accumulation_steps'</span>: 10, <span class="hljs-string">'steps_per_print'</span>: inf, <span class="hljs-string">'fp16'</span>: {<span class="hljs-string">'enabled'</span>: False}}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-s6le15"><strong>Scenario 3</strong>: Setting the <code>accelerate launch</code> command arguments related to DeepSpeed as <code>"auto"</code> in the DeepSpeed` configuration file and check that things work as expected.</p> <ol data-svelte-h="svelte-q77lu7"><li>New <code>ds_config.json</code> with <code>"auto"</code> for the <code>accelerate launch</code> DeepSpeed command arguments:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"bf16"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"enabled"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"zero_optimization"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"stage"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"stage3_gather_16bit_weights_on_model_save"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"offload_optimizer"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"device"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"offload_param"</span><span class="hljs-punctuation">:</span> <span class="hljs-punctuation">{</span> | |
| <span class="hljs-attr">"device"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span> | |
| <span class="hljs-punctuation">}</span> | |
| <span class="hljs-punctuation">}</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_clipping"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_batch_size"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"train_micro_batch_size_per_gpu"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"gradient_accumulation_steps"</span><span class="hljs-punctuation">:</span> <span class="hljs-string">"auto"</span><span class="hljs-punctuation">,</span> | |
| <span class="hljs-attr">"steps_per_print"</span><span class="hljs-punctuation">:</span> <span class="hljs-number">2000000</span> | |
| <span class="hljs-punctuation">}</span><!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-18b0p9"><li>Output of <code>accelerate launch --mixed_precision="fp16" --zero_stage=3 --gradient_accumulation_steps=5 --gradient_clipping=1.0 --offload_param_device="cpu" --offload_optimizer_device="nvme" --zero3_save_16bit_model="true" test.py</code>:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Distributed environment: DEEPSPEED Backend: nccl | |
| Num processes: 4 | |
| Process index: 0 | |
| Local process index: 0 | |
| Device: cuda:0 | |
| Mixed precision <span class="hljs-built_in">type</span>: fp16 | |
| ds_config: {<span class="hljs-string">'bf16'</span>: {<span class="hljs-string">'enabled'</span>: False}, <span class="hljs-string">'zero_optimization'</span>: {<span class="hljs-string">'stage'</span>: 3, <span class="hljs-string">'stage3_gather_16bit_weights_on_model_save'</span>: True, <span class="hljs-string">'offload_optimizer'</span>: {<span class="hljs-string">'device'</span>: <span class="hljs-string">'nvme'</span>}, <span class="hljs-string">'offload_param'</span>: {<span class="hljs-string">'device'</span>: <span class="hljs-string">'cpu'</span>}}, <span class="hljs-string">'gradient_clipping'</span>: 1.0, <span class="hljs-string">'train_batch_size'</span>: <span class="hljs-string">'auto'</span>, <span class="hljs-string">'train_micro_batch_size_per_gpu'</span>: <span class="hljs-string">'auto'</span>, <span class="hljs-string">'gradient_accumulation_steps'</span>: 5, <span class="hljs-string">'steps_per_print'</span>: inf, <span class="hljs-string">'fp16'</span>: {<span class="hljs-string">'enabled'</span>: True, <span class="hljs-string">'auto_cast'</span>: True}}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pxb0rv"><strong>Note</strong>:</p> <ol data-svelte-h="svelte-1x5f67y"><li>Remaining <code>"auto"</code> values are handled in <code>accelerator.prepare()</code> call as explained in point 2 of | |
| <code>Important code changes when using DeepSpeed Config File</code>.</li> <li>Only when <code>gradient_accumulation_steps</code> is <code>auto</code>, the value passed while creating <code>Accelerator</code> object via <code>Accelerator(gradient_accumulation_steps=k)</code> will be used. When using DeepSpeed Plugin, the value from it will be used and it will overwrite the value passed while creating Accelerator object.</li></ol> <h2 class="relative group"><a id="saving-and-loading" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#saving-and-loading"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Saving and loading</span></h2> <ol><li data-svelte-h="svelte-1ix3dy3"><p>Saving and loading of models is unchanged for ZeRO Stage-1 and Stage-2.</p></li> <li><p data-svelte-h="svelte-1vo2i9p">under ZeRO Stage-3, <code>state_dict</code> contains just the placeholders since the model weights are partitioned across multiple GPUs. | |
| ZeRO Stage-3 has 2 options:</p> <p data-svelte-h="svelte-hk8fhr">a. Saving the entire 16bit model weights to directly load later on using <code>model.load_state_dict(torch.load(pytorch_model.bin))</code>. | |
| For this, either set <code>zero_optimization.stage3_gather_16bit_weights_on_model_save</code> to True in DeepSpeed Config file or set | |
| <code>zero3_save_16bit_model</code> to True in DeepSpeed Plugin. | |
| <strong>Note that this option requires consolidation of the weights on one GPU it can be slow and memory demanding, so only use this feature when needed.</strong> | |
| Below is the snippet from <code>examples/by_feature/deepspeed_with_config_support.py</code> showing this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->unwrapped_model = accelerator.unwrap_model(model) | |
| <span class="hljs-comment"># New Code #</span> | |
| <span class="hljs-comment"># Saves the whole/unpartitioned fp16 model when in ZeRO Stage-3 to the output directory if</span> | |
| <span class="hljs-comment"># `stage3_gather_16bit_weights_on_model_save` is True in DeepSpeed Config file or</span> | |
| <span class="hljs-comment"># `zero3_save_16bit_model` is True in DeepSpeed Plugin.</span> | |
| <span class="hljs-comment"># For Zero Stages 1 and 2, models are saved as usual in the output directory.</span> | |
| <span class="hljs-comment"># The model name saved is `pytorch_model.bin`</span> | |
| unwrapped_model.save_pretrained( | |
| args.output_dir, | |
| is_main_process=accelerator.is_main_process, | |
| save_function=accelerator.save, | |
| state_dict=accelerator.get_state_dict(model), | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ddq7v6">b. To get 32bit weights, first save the model using <code>model.save_checkpoint()</code>. | |
| Below is the snippet from <code>examples/by_feature/deepspeed_with_config_support.py</code> showing this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->success = model.save_checkpoint(PATH, ckpt_id, checkpoint_state_dict) | |
| status_msg = <span class="hljs-string">f"checkpointing: PATH=<span class="hljs-subst">{PATH}</span>, ckpt_id=<span class="hljs-subst">{ckpt_id}</span>"</span> | |
| <span class="hljs-keyword">if</span> success: | |
| logging.info(<span class="hljs-string">f"Success <span class="hljs-subst">{status_msg}</span>"</span>) | |
| <span class="hljs-keyword">else</span>: | |
| logging.warning(<span class="hljs-string">f"Failure <span class="hljs-subst">{status_msg}</span>"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hmvopk">This will create ZeRO model and optimizer partitions along with <code>zero_to_fp32.py</code> script in checkpoint directory. | |
| You can use this script to do offline consolidation. | |
| It requires no configuration files or GPUs. Here is an example of its usage:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ <span class="hljs-built_in">cd</span> /path/to/checkpoint_dir | |
| $ ./zero_to_fp32.py . pytorch_model.bin | |
| Processing zero checkpoint at global_step1 | |
| Detected checkpoint of <span class="hljs-built_in">type</span> zero stage 3, world_size: 2 | |
| Saving fp32 state dict to pytorch_model.bin (total_numel=60506624)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1hbu42f">To get 32bit model for saving/inference, you can perform:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> deepspeed.utils.zero_to_fp32 <span class="hljs-keyword">import</span> load_state_dict_from_zero_checkpoint | |
| unwrapped_model = accelerator.unwrap_model(model) | |
| fp32_model = load_state_dict_from_zero_checkpoint(unwrapped_model, checkpoint_dir)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-h6u0m6">If you are only interested in the <code>state_dict</code>, you can do the following:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> deepspeed.utils.zero_to_fp32 <span class="hljs-keyword">import</span> get_fp32_state_dict_from_zero_checkpoint | |
| state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13dj8kw">Note that all these functions require ~2x memory (general RAM) of the size of the final checkpoint.</p></li></ol> <h2 class="relative group"><a id="zero-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#zero-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>ZeRO Inference</span></h2> <p data-svelte-h="svelte-5ua48b">DeepSpeed ZeRO Inference supports ZeRO stage 3 with ZeRO-Infinity. | |
| It uses the same ZeRO protocol as training, but it doesn’t use an optimizer and a lr scheduler and only stage 3 is relevant. | |
| With accelerate integration, you just need to prepare the model and dataloader as shown below:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model, eval_dataloader = accelerator.prepare(model, eval_dataloader)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="few-caveats-to-be-aware-of" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#few-caveats-to-be-aware-of"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Few caveats to be aware of</span></h2> <ol data-svelte-h="svelte-rz68fo"><li>Current integration doesn’t support Pipeline Parallelism of DeepSpeed.</li> <li>Current integration doesn’t support <code>mpu</code>, limiting the tensor parallelism which is supported in Megatron-LM.</li> <li>Current integration doesn’t support multiple models.</li></ol> <h2 class="relative group"><a id="multi-node-deepspeed" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#multi-node-deepspeed"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Multi-node DeepSpeed</span></h2> <p data-svelte-h="svelte-1fkk50y">DeepSpeed supports multi-node inference and training over a variety of different launchers. You can specify a different launcher by setting the <code>deepspeed_multinode_launcher</code> config in the CLI or in the DeepSpeed config file.</p> <p data-svelte-h="svelte-1fjh6gc">Currently, accelerate supports passing configuration for the following DeepSpeed multi-node launchers: <code>pdsh</code> (default), <code>standard</code>, <code>openmpi</code>, <code>mvapich</code>, <code>mpich</code>, <code>slurm</code>, <code>nossh</code> (requires DeepSpeed >= 0.14.5).</p> <p data-svelte-h="svelte-lzjsvg">Please read the <a href="https://www.deepspeed.ai/getting-started/#resource-configuration-multi-node" rel="nofollow">DeepSpeed documentation</a> for more information on the different launchers. By default, DeepSpeed will attempt to use passwordless SSH from the main machine node to the other nodes to perform the launcher command. In this configuration, the accelerate launch command only needs to be run on the main node. If using the <code>nossh</code> launcher, you will need to run the accelerate launch command on every node using copied configuration.</p> <h2 class="relative group"><a id="deepspeed-resources" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deepspeed-resources"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DeepSpeed Resources</span></h2> <p data-svelte-h="svelte-1aiu2sl">The documentation for the internals related to deepspeed can be found <a href="../package_reference/deepspeed">here</a>.</p> <ul data-svelte-h="svelte-9racni"><li><a href="https://github.com/deepspeedai/DeepSpeed" rel="nofollow">Project’s github</a></li> <li><a href="https://www.deepspeed.ai/getting-started/" rel="nofollow">Usage docs</a></li> <li><a href="https://deepspeed.readthedocs.io/en/latest/index.html" rel="nofollow">API docs</a></li> <li><a href="https://www.microsoft.com/en-us/research/search/?q=deepspeed" rel="nofollow">Blog posts</a></li></ul> <p data-svelte-h="svelte-1pnpyer">Papers:</p> <ul data-svelte-h="svelte-29gk9s"><li><a href="https://huggingface.co/papers/1910.02054" rel="nofollow">ZeRO: Memory Optimizations Toward Training Trillion Parameter Models</a></li> <li><a href="https://huggingface.co/papers/2101.06840" rel="nofollow">ZeRO-Offload: Democratizing Billion-Scale Model Training</a></li> <li><a href="https://huggingface.co/papers/2104.07857" rel="nofollow">ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning</a></li> <li><a href="https://huggingface.co/papers/2306.10209" rel="nofollow">ZeRO++: Extremely Efficient Collective Communication for Giant Model Training</a></li></ul> <p data-svelte-h="svelte-1feo4z0">Finally, please, remember that <code>Accelerate</code> only integrates DeepSpeed, therefore if you | |
| have any problems or questions with regards to DeepSpeed usage, please, file an issue with <a href="https://github.com/deepspeedai/DeepSpeed/issues" rel="nofollow">DeepSpeed GitHub</a>.</p> <blockquote class="tip"><p data-svelte-h="svelte-we3qam">For those interested in the similarities and differences between FSDP and DeepSpeed, please check out the <a href="../concept_guides/fsdp_and_deepspeed">concept guide here</a>!</p></blockquote> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/accelerate/blob/main/docs/source/usage_guides/deepspeed.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1q7nz6m = { | |
| assets: "/docs/accelerate/pr_4021/en", | |
| base: "/docs/accelerate/pr_4021/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/accelerate/pr_4021/en/_app/immutable/entry/start.8a49e72b.js"), | |
| import("/docs/accelerate/pr_4021/en/_app/immutable/entry/app.1df4d18e.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 42], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 120 kB
- Xet hash:
- 2cc9e2bdcce82e3de85ac68b9cd673ea543c375c57ee63e768e6f3251d3c0713
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.