Buckets:

hf-doc-build/doc-dev / transformers /main /ja /perf_train_cpu_many.html
rtrm's picture
download
raw
27.5 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Efficient Training on Multiple CPUs&quot;,&quot;local&quot;:&quot;efficient-training-on-multiple-cpus&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Intel® oneCCL Bindings for PyTorch&quot;,&quot;local&quot;:&quot;intel-oneccl-bindings-for-pytorch&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Intel® oneCCL Bindings for PyTorch installation:&quot;,&quot;local&quot;:&quot;intel-oneccl-bindings-for-pytorch-installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Intel® MPI library&quot;,&quot;local&quot;:&quot;intel-mpi-library&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;IPEX installation:&quot;,&quot;local&quot;:&quot;ipex-installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usage in Trainer&quot;,&quot;local&quot;:&quot;usage-in-trainer&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/start.1486e459.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/scheduler.9bc65507.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/singletons.eee55cbf.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.3b203c72.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/paths.59da1547.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/app.d9ae818f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.707bf1b6.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/0.c06aa070.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/118.db8534e8.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Tip.c2ecdbf4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/CodeBlock.54a9f38d.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/EditOnGithub.922df6ba.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Efficient Training on Multiple CPUs&quot;,&quot;local&quot;:&quot;efficient-training-on-multiple-cpus&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Intel® oneCCL Bindings for PyTorch&quot;,&quot;local&quot;:&quot;intel-oneccl-bindings-for-pytorch&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Intel® oneCCL Bindings for PyTorch installation:&quot;,&quot;local&quot;:&quot;intel-oneccl-bindings-for-pytorch-installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Intel® MPI library&quot;,&quot;local&quot;:&quot;intel-mpi-library&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;IPEX installation:&quot;,&quot;local&quot;:&quot;ipex-installation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Usage in Trainer&quot;,&quot;local&quot;:&quot;usage-in-trainer&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="efficient-training-on-multiple-cpus" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#efficient-training-on-multiple-cpus"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Efficient Training on Multiple CPUs</span></h1> <p data-svelte-h="svelte-x4ob9g">1つのCPUでのトレーニングが遅すぎる場合、複数のCPUを使用できます。このガイドは、PyTorchベースのDDPを使用した分散CPUトレーニングに焦点を当てています。</p> <h2 class="relative group"><a id="intel-oneccl-bindings-for-pytorch" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#intel-oneccl-bindings-for-pytorch"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Intel® oneCCL Bindings for PyTorch</span></h2> <p data-svelte-h="svelte-iaalck"><a href="https://github.com/oneapi-src/oneCCL" rel="nofollow">Intel® oneCCL</a>(集合通信ライブラリ)は、allreduce、allgather、alltoallなどの収集通信を実装した効率的な分散ディープラーニングトレーニング用のライブラリです。oneCCLの詳細については、<a href="https://spec.oneapi.com/versions/latest/elements/oneCCL/source/index.html" rel="nofollow">oneCCLドキュメント</a><a href="https://spec.oneapi.com/versions/latest/elements/oneCCL/source/index.html" rel="nofollow">oneCCL仕様</a>を参照してください。</p> <p data-svelte-h="svelte-12luyx0">モジュール<code>oneccl_bindings_for_pytorch</code>(バージョン1.12以前は<code>torch_ccl</code>)は、PyTorch C10D ProcessGroup APIを実装し、外部のProcessGroupとして動的にロードでき、現在はLinuxプラットフォームでのみ動作します。</p> <p data-svelte-h="svelte-imt8h1"><a href="https://github.com/intel/torch-ccl" rel="nofollow">torch-ccl</a>の詳細情報を確認してください。</p> <h3 class="relative group"><a id="intel-oneccl-bindings-for-pytorch-installation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#intel-oneccl-bindings-for-pytorch-installation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Intel® oneCCL Bindings for PyTorch installation:</span></h3> <p data-svelte-h="svelte-1tdwdd8">Wheelファイルは、以下のPythonバージョン用に利用可能です:</p> <table data-svelte-h="svelte-1yk106u"><thead><tr><th align="center">Extension Version</th> <th align="center">Python 3.6</th> <th align="center">Python 3.7</th> <th align="center">Python 3.8</th> <th align="center">Python 3.9</th> <th align="center">Python 3.10</th></tr></thead> <tbody><tr><td align="center">1.13.0</td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td align="center">1.12.100</td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td align="center">1.12.0</td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td align="center">1.11.0</td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr> <tr><td align="center">1.10.0</td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td> <td align="center"></td></tr></tbody></table> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install oneccl_bind_pt=={pytorch_version} -f https://developer.intel.com/ipex-whl-stable-cpu<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gwf101">where <code>{pytorch_version}</code> should be your PyTorch version, for instance 1.13.0.
Check more approaches for <a href="https://github.com/intel/torch-ccl" rel="nofollow">oneccl_bind_pt installation</a>.
Versions of oneCCL and PyTorch must match.</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-is6c7w">oneccl_bindings_for_pytorch 1.12.0 prebuilt wheel does not work with PyTorch 1.12.1 (it is for PyTorch 1.12.0)
PyTorch 1.12.1 should work with oneccl_bindings_for_pytorch 1.12.100</p></div> <p data-svelte-h="svelte-1o5c06n"><code>{pytorch_version}</code> は、あなたのPyTorchのバージョン(例:1.13.0)に置き換える必要があります。重要なのは、oneCCLとPyTorchのバージョンが一致していることです。<a href="https://github.com/intel/torch-ccl" rel="nofollow">oneccl_bind_ptのインストール</a>に関するさらなるアプローチを確認できます。</p> <div class="course-tip course-tip-orange bg-gradient-to-br dark:bg-gradient-to-r before:border-orange-500 dark:before:border-orange-800 from-orange-50 dark:from-gray-900 to-white dark:to-gray-950 border border-orange-50 text-orange-700 dark:text-gray-400"><p data-svelte-h="svelte-qkrsq0"><code>oneccl_bindings_for_pytorch</code>の1.12.0プリビルトホイールはPyTorch 1.12.1と互換性がありません(これはPyTorch 1.12.0用です)。PyTorch 1.12.1を使用する場合は、<code>oneccl_bindings_for_pytorch</code>バージョン1.12.100を使用する必要があります。</p></div> <h2 class="relative group"><a id="intel-mpi-library" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#intel-mpi-library"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Intel® MPI library</span></h2> <p data-svelte-h="svelte-1ozjsp6">この基準ベースのMPI実装を使用して、Intel®アーキテクチャ上で柔軟で効率的、スケーラブルなクラスタメッセージングを提供します。このコンポーネントは、Intel® oneAPI HPC Toolkitの一部です。</p> <p data-svelte-h="svelte-riba3m">oneccl_bindings_for_pytorchはMPIツールセットと一緒にインストールされます。使用する前に環境をソース化する必要があります。</p> <p data-svelte-h="svelte-324hiy">for Intel® oneCCL &gt;= 1.12.0</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->oneccl_bindings_for_pytorch_path=$(python -c <span class="hljs-string">&quot;from oneccl_bindings_for_pytorch import cwd; print(cwd)&quot;</span>)
<span class="hljs-built_in">source</span> <span class="hljs-variable">$oneccl_bindings_for_pytorch_path</span>/env/setvars.sh<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1x3l803">for Intel® oneCCL whose version &lt; 1.12.0</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->torch_ccl_path=$(python -c <span class="hljs-string">&quot;import torch; import torch_ccl; import os; print(os.path.abspath(os.path.dirname(torch_ccl.__file__)))&quot;</span>)
<span class="hljs-built_in">source</span> <span class="hljs-variable">$torch_ccl_path</span>/env/setvars.sh<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="ipex-installation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#ipex-installation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>IPEX installation:</span></h4> <p data-svelte-h="svelte-g3w6te">IPEXは、Float32およびBFloat16の両方でCPUトレーニングのパフォーマンス最適化を提供します。詳細は<a href="./perf_train_cpu">こちらのシングルCPUセクション</a>をご参照ください。</p> <p data-svelte-h="svelte-p0koup">以下の「トレーナーでの使用」は、Intel® MPIライブラリでmpirunを使用する例を示しています。</p> <h2 class="relative group"><a id="usage-in-trainer" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#usage-in-trainer"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Usage in Trainer</span></h2> <p data-svelte-h="svelte-9wlwki">トレーナーでのマルチCPU分散トレーニングを有効にするために、ユーザーはコマンド引数に <strong><code>--ddp_backend ccl</code></strong> を追加する必要があります。</p> <p data-svelte-h="svelte-zu2hn2">例を見てみましょう。<a href="https://github.com/huggingface/transformers/tree/main/examples/pytorch/question-answering" rel="nofollow">質問応答の例</a></p> <p data-svelte-h="svelte-bujigo">以下のコマンドは、1つのXeonノードで2つのプロセスを使用してトレーニングを有効にします。1つのプロセスが1つのソケットで実行されます。OMP_NUM_THREADS/CCL_WORKER_COUNT変数は、最適なパフォーマンスを調整するために調整できます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> export CCL_WORKER_COUNT=1
export MASTER_ADDR=127.0.0.1
mpirun -n 2 -genv OMP_NUM_THREADS=23 \
python3 run_qa.py \
--model_name_or_path google-bert/bert-large-uncased \
--dataset_name squad \
--do_train \
--do_eval \
--per_device_train_batch_size 12 \
--learning_rate 3e-5 \
--num_train_epochs 2 \
--max_seq_length 384 \
--doc_stride 128 \
--output_dir /tmp/debug_squad/ \
--no_cuda \
--ddp_backend ccl \
--use_ipex<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4pe4cr">以下のコマンドは、2つのXeonプロセッサ(node0とnode1、node0をメインプロセスとして使用)で合計4つのプロセスを使用してトレーニングを有効にします。ppn(ノードごとのプロセス数)は2に設定され、1つのソケットごとに1つのプロセスが実行されます。最適なパフォーマンスを得るために、OMP_NUM_THREADS/CCL_WORKER_COUNT変数を調整できます。</p> <p data-svelte-h="svelte-1jhnnso">node0では、各ノードのIPアドレスを含む構成ファイルを作成し、その構成ファイルのパスを引数として渡す必要があります。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> cat hostfile
xxx.xxx.xxx.xxx #node0 ip
xxx.xxx.xxx.xxx #node1 ip<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-smvu0d">ノード0で次のコマンドを実行すると、ノード0とノード1で<strong>4DDP</strong>がBF16自動混合精度で有効になります。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --> export CCL_WORKER_COUNT=1
export MASTER_ADDR=xxx.xxx.xxx.xxx #node0 ip
mpirun -f hostfile -n 4 -ppn 2 \
-genv OMP_NUM_THREADS=23 \
python3 run_qa.py \
--model_name_or_path google-bert/bert-large-uncased \
--dataset_name squad \
--do_train \
--do_eval \
--per_device_train_batch_size 12 \
--learning_rate 3e-5 \
--num_train_epochs 2 \
--max_seq_length 384 \
--doc_stride 128 \
--output_dir /tmp/debug_squad/ \
--no_cuda \
--ddp_backend ccl \
--use_ipex \
--bf16<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/perf_train_cpu_many.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_jement = {
assets: "/docs/transformers/main/ja",
base: "/docs/transformers/main/ja",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/ja/_app/immutable/entry/start.1486e459.js"),
import("/docs/transformers/main/ja/_app/immutable/entry/app.d9ae818f.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 118],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
27.5 kB
·
Xet hash:
53aa90a816c50dac3b51449b3df491a76b679041c934234798abe99e7627a01b

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.