Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Low Precision Training Methods","local":"low-precision-training-methods","sections":[{"title":"What training on FP8 means","local":"what-training-on-fp8-means","sections":[],"depth":2},{"title":"Configuring the Accelerator","local":"configuring-the-accelerator","sections":[],"depth":2},{"title":"Configuring MS-AMP","local":"configuring-ms-amp","sections":[],"depth":2},{"title":"Configuring TransformersEngine","local":"configuring-transformersengine","sections":[],"depth":2},{"title":"Configuring torchao","local":"configuring-torchao","sections":[],"depth":2},{"title":"Example Zoo","local":"example-zoo","sections":[],"depth":2},{"title":"Further Reading","local":"further-reading","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/accelerate/pr_4021/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/entry/start.8a49e72b.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/scheduler.b9285784.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/singletons.7547c222.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/index.6d423e5c.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/paths.d42c9205.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/entry/app.1df4d18e.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/preload-helper.b0bd19d1.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/index.26bc89a1.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/nodes/0.0e7c56e8.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/nodes/51.ba6185aa.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/Tip.e4eba3d6.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.7a0ae628.js"> | |
| <link rel="modulepreload" href="/docs/accelerate/pr_4021/en/_app/immutable/chunks/CodeBlock.844ff9c3.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Low Precision Training Methods","local":"low-precision-training-methods","sections":[{"title":"What training on FP8 means","local":"what-training-on-fp8-means","sections":[],"depth":2},{"title":"Configuring the Accelerator","local":"configuring-the-accelerator","sections":[],"depth":2},{"title":"Configuring MS-AMP","local":"configuring-ms-amp","sections":[],"depth":2},{"title":"Configuring TransformersEngine","local":"configuring-transformersengine","sections":[],"depth":2},{"title":"Configuring torchao","local":"configuring-torchao","sections":[],"depth":2},{"title":"Example Zoo","local":"example-zoo","sections":[],"depth":2},{"title":"Further Reading","local":"further-reading","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="low-precision-training-methods" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#low-precision-training-methods"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Low Precision Training Methods</span></h1> <p data-svelte-h="svelte-1py1qst">Accelerate provides integrations to train on lower precision methods using specified supported hardware through the <code>TransformersEngine</code>, <code>MS-AMP</code>, and <code>torchao</code> packages. This documentation will help guide you through what hardware is supported, how to configure your <a href="/docs/accelerate/pr_4021/en/package_reference/accelerator#accelerate.Accelerator">Accelerator</a> to leverage the low precision methods, and what you can expect when training.</p> <h2 class="relative group"><a id="what-training-on-fp8-means" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#what-training-on-fp8-means"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>What training on FP8 means</span></h2> <p data-svelte-h="svelte-wuhpuo">To explore more of the nitty-gritty in training in FP8 with PyTorch and Accelerate, check out the <a href="../concept_guides/low_precision_training">concept_guide</a> on why this can be difficult. But essentially rather than training in BF16, some (or all) aspects of training a model can be performed using 8 bits instead of 16. The challenge is doing so without degrading final performance.</p> <p data-svelte-h="svelte-10cwb11">This is only enabled on specific NVIDIA hardware, namely:</p> <ul data-svelte-h="svelte-5d1df8"><li>Anything after the 3000 series consumer graphics cards (such as the 4090)</li> <li>Hopper-based GPU architectures (such as the <code>H100</code> and <code>H200</code>)</li></ul> <p data-svelte-h="svelte-149dy0g">What this will result in is some reduction in the memory used (as we’ve cut the needed memory in half for some parts of training) and an increase in throughput <em>should</em> be seen as well for larger models that can replace certain layers with FP8-enabled ones.</p> <h2 class="relative group"><a id="configuring-the-accelerator" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuring-the-accelerator"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuring the Accelerator</span></h2> <p data-svelte-h="svelte-mfpbqx">Currently two actively maintained backends for FP8 are supported (<code>TransformersEngine</code> and <code>torchao</code>), each with different capabilities and configurations. A legacy <code>MS-AMP</code> backend also exists but is no longer recommended (see <a href="#configuring-ms-amp">below</a> for details).</p> <p data-svelte-h="svelte-d1et9p">To use either, the same core API is used. Just pass <code>mixed_precision="fp8"</code> to either the <a href="/docs/accelerate/pr_4021/en/package_reference/accelerator#accelerate.Accelerator">Accelerator</a>, during <code>accelerate config</code> when prompted about mixed precision, or as part of your <code>config.yaml</code> file in the <code>mixed_precision</code> key:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->from accelerate import Accelerator | |
| <span class="hljs-attribute">accelerator</span> <span class="hljs-operator">=</span> Accelerator(mixed_precision<span class="hljs-operator">=</span><span class="hljs-string">"fp8"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jfw9hc">To specify a backend (and customize other parts of the FP8 mixed precision setup), you can utilize one of the <code>RecipeKwargs</code> dataclasses such as <code>utils.AORecipeKwargs</code>, <code>utils.TERecipeKwargs</code>, or <code>utils.MSAMPRecipeKwargs</code>; you can also clarify it in your config <code>yaml</code>/during <code>accelerate launch</code>. We recommend using <code>TransformersEngine</code> or <code>torchao</code> for new projects:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate import Accelerator | |
| <span class="hljs-keyword">from</span> accelerate.utils import TERecipeKwargs, AORecipeKwargs | |
| <span class="hljs-comment"># Use TransformersEngine</span> | |
| kwargs = [TERecipeKwargs()] | |
| <span class="hljs-comment"># Or to use torchao</span> | |
| <span class="hljs-comment"># kwargs = [AORecipeKwargs()]</span> | |
| accelerator = Accelerator(<span class="hljs-attribute">mixed_precision</span>=<span class="hljs-string">"fp8"</span>, <span class="hljs-attribute">kwarg_handlers</span>=kwargs)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">mixed_precision:</span> <span class="hljs-string">fp8</span> | |
| <span class="hljs-attr">fp8_config:</span> | |
| <span class="hljs-attr">amax_compute_algo:</span> <span class="hljs-string">max</span> | |
| <span class="hljs-attr">amax_history_len:</span> <span class="hljs-number">1024</span> | |
| <span class="hljs-attr">backend:</span> <span class="hljs-string">TE</span> | |
| <span class="hljs-attr">fp8_format:</span> <span class="hljs-string">HYBRID</span> | |
| <span class="hljs-attr">interval:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">margin:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-attr">override_linear_precision:</span> <span class="hljs-string">(false,</span> <span class="hljs-literal">false</span><span class="hljs-string">,</span> <span class="hljs-literal">false</span><span class="hljs-string">)</span> | |
| <span class="hljs-attr">use_autocast_during_eval:</span> <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="configuring-ms-amp" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuring-ms-amp"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuring MS-AMP</span></h2> <blockquote class="warning"><p data-svelte-h="svelte-17ommj7"><strong>⚠️ Deprecated / Unmaintained:</strong> MS-AMP is no longer actively maintained by Microsoft. The <a href="https://github.com/Azure/MS-AMP" rel="nofollow">MS-AMP repository</a> has not received updates since 2023 and has known compatibility issues:</p> <ul data-svelte-h="svelte-fabj8h"><li>Requires CUDA 11.x (does not support CUDA 12.x+)</li> <li>Requires older NCCL versions incompatible with recent PyTorch releases</li> <li>Does not support recent PyTorch versions (2.2+)</li></ul> <p data-svelte-h="svelte-1v3pi2f"><strong>We strongly recommend using <a href="#configuring-transformersengine"><code>TransformersEngine</code></a> or <a href="#configuring-torchao"><code>torchao</code></a> instead for all new and existing FP8 training workflows.</strong> Both are actively maintained and support modern CUDA/PyTorch versions. Native PyTorch FP8 support via <code>torchao</code> is particularly promising as a vendor-neutral solution.</p> <p data-svelte-h="svelte-1bl9wit">The MS-AMP backend is retained in Accelerate for legacy compatibility but may be removed in a future release.</p></blockquote> <p data-svelte-h="svelte-1mf0zic"><code>MS-AMP</code> has a single configuration argument: the optimization level.</p> <p data-svelte-h="svelte-11bftkh">Currently two levels of optimization are supported in the Accelerate integration, <code>"O1"</code> and <code>"O2"</code> (using the letter ‘o’, not zero).</p> <ul data-svelte-h="svelte-ha185h"><li><code>"O1"</code> will cast the weight gradients and <code>all_reduce</code> communications to happen in 8-bit, while the rest are done in 16 bit. This reduces the general GPU memory usage and speeds up communication bandwidths.</li> <li><code>"O2"</code> will also cast first-order optimizer states into 8 bit, while the second order states are in FP16. (Currently just the <code>Adam</code> optimizer is supported). This tries its best to minimize final accuracy degradation and will save the highest potential memory.</li></ul> <p data-svelte-h="svelte-wx6vs8">To specify an optimization level, pass it to the <code>FP8KwargsHandler</code> by setting the <code>optimization_level</code> argument:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate import Accelerator | |
| <span class="hljs-keyword">from</span> accelerate.utils import FP8RecipeKwargs | |
| kwargs = [FP8RecipeKwargs(<span class="hljs-attribute">backend</span>=<span class="hljs-string">"msamp"</span>, <span class="hljs-attribute">optimization_level</span>=<span class="hljs-string">"O2"</span>)] | |
| accelerator = Accelerator(<span class="hljs-attribute">mixed_precision</span>=<span class="hljs-string">"fp8"</span>, <span class="hljs-attribute">kwarg_handlers</span>=kwargs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-vo4vij">Or during <code>accelerate launch</code> via <code>--fp8_backend=msamp --fp8_opt_level=O2</code></p> <p data-svelte-h="svelte-1n0fir7">Similarly this can be set in your <code>config.yaml</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-symbol">mixed_precision:</span> fp8 | |
| <span class="hljs-symbol">fp8_config:</span> | |
| <span class="hljs-symbol"> backend:</span> MSAMP | |
| <span class="hljs-symbol"> opt_level:</span> O2<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="configuring-transformersengine" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuring-transformersengine"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuring TransformersEngine</span></h2> <p data-svelte-h="svelte-1tlihxp">TransformersEngine has many options for customizing how and what FP8 calculations are performed. A full list of supported arguments and what they mean are available in <a href="https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/common.html" rel="nofollow">NVIDIA’s documentation</a>, however they are restated as part of <code>FP8KwargsHandler</code>’s docstring for your convenience.</p> <p data-svelte-h="svelte-wvn4fr">Accelerate tries to set sensible defaults, but exploring and tweaking the various parameters yourself can lead to better performance potentially.</p> <p data-svelte-h="svelte-8khoko">To use it, specify <code>backend="te"</code> and modify any of the arguments you want as part of your kwarg handler:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate import Accelerator | |
| <span class="hljs-keyword">from</span> accelerate.utils import FP8RecipeKwargs | |
| kwargs = [FP8RecipeKwargs(<span class="hljs-attribute">backend</span>=<span class="hljs-string">"te"</span>, <span class="hljs-built_in">..</span>.)] | |
| accelerator = Accelerator(<span class="hljs-attribute">mixed_precision</span>=<span class="hljs-string">"fp8"</span>, <span class="hljs-attribute">kwarg_handlers</span>=kwargs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-99qxte">Or during <code>accelerate launch</code> via <code>--fp8_backend=te ...</code>. Use <code>accelerate launch --fp8_backend=te -h</code> to see relevent arguments.</p> <p data-svelte-h="svelte-1n0fir7">Similarly this can be set in your <code>config.yaml</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">mixed_precision:</span> <span class="hljs-string">fp8</span> | |
| <span class="hljs-attr">fp8_config:</span> | |
| <span class="hljs-attr">amax_compute_algo:</span> <span class="hljs-string">max</span> | |
| <span class="hljs-attr">amax_history_len:</span> <span class="hljs-number">1024</span> | |
| <span class="hljs-attr">backend:</span> <span class="hljs-string">TE</span> | |
| <span class="hljs-attr">fp8_format:</span> <span class="hljs-string">HYBRID</span> | |
| <span class="hljs-attr">interval:</span> <span class="hljs-number">1</span> | |
| <span class="hljs-attr">margin:</span> <span class="hljs-number">0</span> | |
| <span class="hljs-attr">override_linear_precision:</span> <span class="hljs-string">(false,</span> <span class="hljs-literal">false</span><span class="hljs-string">,</span> <span class="hljs-literal">false</span><span class="hljs-string">)</span> | |
| <span class="hljs-attr">use_autocast_during_eval:</span> <span class="hljs-literal">false</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="configuring-torchao" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#configuring-torchao"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Configuring torchao</span></h2> <p data-svelte-h="svelte-13u4x7b"><code>torchao</code> is a <a href="https://github.com/pytorch/ao/tree/main/torchao/float8" rel="nofollow">PyTorch-driven</a> hackable FP8 backend, aiming to be more approchable than the prior two engines. One of the core differences with <code>ao</code> compared to the prior two is that for numerical stability, it’s found to be generally better off keeping the first <em>and</em> last layers in the model at the regular precision (be it FP32 or BF16), and then the other layers quantized down to FP8. As a result, a config for <code>ao</code> looks a bit differently:</p> <blockquote data-svelte-h="svelte-rg03zf"><p>Note: this API is experimental and is subject to change</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate import Accelerator | |
| <span class="hljs-keyword">from</span> accelerate.utils import AORecipeKwargs, TorchDynamoPlugin, FullyShardedDataParallelPlugin | |
| <span class="hljs-keyword">from</span> torchao.float8 import Float8LinearConfig | |
| fsdp2_plugin = FullyShardedDataParallelPlugin( | |
| <span class="hljs-attribute">fsdp_version</span>=2, | |
| <span class="hljs-attribute">cpu_ram_efficient_loading</span>=<span class="hljs-literal">False</span>, # CPU RAM efficient loading CANNOT work with fp8 torchao | |
| <span class="hljs-attribute">fsdp_auto_wrap_policy</span>=<span class="hljs-string">"TRANSFORMER_BASED_WRAP"</span>, | |
| ) | |
| dynamo_plugin = TorchDynamoPlugin( | |
| <span class="hljs-attribute">backend</span>=<span class="hljs-string">"inductor"</span>, | |
| <span class="hljs-attribute">use_regional_compilation</span>=<span class="hljs-literal">True</span>, | |
| ) | |
| fp8_config = Float8LinearConfig( | |
| <span class="hljs-attribute">enable_fsdp_float8_all_gather</span>=<span class="hljs-literal">True</span>, # Use FP8 all_gather <span class="hljs-keyword">in</span> FSDP2 | |
| <span class="hljs-attribute">pad_inner_dim</span>=<span class="hljs-literal">True</span>, | |
| ) | |
| kwargs = [AORecipeKwargs( | |
| <span class="hljs-attribute">config</span>=fp8_config | |
| )] | |
| accelerator = Accelerator( | |
| <span class="hljs-attribute">mixed_precision</span>=<span class="hljs-string">"fp8"</span>, | |
| <span class="hljs-attribute">fsdp_plugin</span>=fsdp2_plugin, | |
| <span class="hljs-attribute">dynamo_plugin</span>=dynamo_plugin, | |
| <span class="hljs-attribute">kwarg_handlers</span>=kwargs, | |
| )<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qvuvpw">Or during <code>accelerate launch</code> via <code>--fp8_backend=ao ...</code>. Use <code>accelerate launch --fp8_backend=ao -h</code> to see relevent arguments.</p> <p data-svelte-h="svelte-12gtxdw">Similarly, this can be set in <code>config.yaml</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-attr">mixed_precision:</span> <span class="hljs-string">fp8</span> | |
| <span class="hljs-attr">fsdp_config:</span> | |
| <span class="hljs-attr">fsdp_auto_wrap_policy:</span> <span class="hljs-string">TRANSFORMER_BASED_WRAP</span> | |
| <span class="hljs-attr">fsdp_cpu_ram_efficient_loading:</span> <span class="hljs-literal">false</span> | |
| <span class="hljs-attr">fsdp_version:</span> <span class="hljs-number">2</span> | |
| <span class="hljs-attr">fp8_config:</span> | |
| <span class="hljs-attr">backend:</span> <span class="hljs-string">AO</span> | |
| <span class="hljs-attr">pad_inner_dim:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">enable_fsdp_float8_all_gather:</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">dynamo_config:</span> | |
| <span class="hljs-attr">dynamo_backend:</span> <span class="hljs-string">INDUCTOR</span> | |
| <span class="hljs-attr">dynamo_use_regional_compilation:</span> <span class="hljs-literal">true</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qqyqot">To learn more about the specific parameters to be used, please see the official <code>torchao</code> repo.</p> <h2 class="relative group"><a id="example-zoo" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#example-zoo"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Example Zoo</span></h2> <p data-svelte-h="svelte-1ay0trc">We have examples showcasing training with FP8 both with accelerate and its underlying implementation available in the accelerate repo. | |
| Currently we support scripts showcasing:</p> <ul data-svelte-h="svelte-1affbo7"><li>Single GPU</li> <li>Distributed Data Parallelism (Multi-GPU)</li> <li>Fully Sharded Data Parallelism</li> <li>DeepSpeed ZeRO 1 through 3</li></ul> <p data-svelte-h="svelte-sau342">Find out more <a href="https://github.com/huggingface/accelerate/tree/main/benchmarks/fp8" rel="nofollow">here</a></p> <h2 class="relative group"><a id="further-reading" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#further-reading"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Further Reading</span></h2> <p data-svelte-h="svelte-t5s4ol">To learn more about training in FP8 please check out the following resources:</p> <ul data-svelte-h="svelte-1yn0lu3"><li><a href="../concept_guides/low_precision_training">Our concept guide</a> detailing into more about TransformersEngine, torchao, and MS-AMP</li> <li><a href="https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/api/common.html" rel="nofollow">The <code>transformers-engine</code> documentation</a></li> <li><a href="https://github.com/pytorch/ao/tree/main/torchao/float8" rel="nofollow">The <code>torchao</code> documentation</a></li> <li><a href="https://azure.github.io/MS-AMP/docs/" rel="nofollow">The <code>MS-AMP</code> documentation</a> (⚠️ no longer maintained)</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/accelerate/blob/main/docs/source/usage_guides/low_precision_training.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1q7nz6m = { | |
| assets: "/docs/accelerate/pr_4021/en", | |
| base: "/docs/accelerate/pr_4021/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/accelerate/pr_4021/en/_app/immutable/entry/start.8a49e72b.js"), | |
| import("/docs/accelerate/pr_4021/en/_app/immutable/entry/app.1df4d18e.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 51], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 42.7 kB
- Xet hash:
- 2d15f90c51ce1a2af1feda5a6d97691ffa5e9abc7994b6e1ba7253ed3be55ced
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.