Buckets:
| import{s as $n,o as hn,n as W}from"../chunks/scheduler.9bc65507.js";import{S as Rn,i as Bn,g as o,s as a,r,A as _n,h as j,f as e,c as T,j as Qn,u as I,x as C,k as An,y as gn,a as t,v as c,d as u,t as m,w as d,m as bn,n as En}from"../chunks/index.707bf1b6.js";import{T as al}from"../chunks/Tip.c2ecdbf4.js";import{C as g}from"../chunks/CodeBlock.54a9f38d.js";import{H as k,E as Zn}from"../chunks/EditOnGithub.922df6ba.js";import{H as Jl,a as z}from"../chunks/HfOption.6d864328.js";function Nn(A){let s,i='DeepSpeed를 설치하는 데 문제가 있는 경우 <a href="../debugging#deepspeed-cuda-installation">DeepSpeed CUDA 설치</a> 가이드를 확인하세요. DeepSpeed에는 pip 설치 가능한 PyPI 패키지로 설치할 수 있지만, 하드웨어에 가장 잘 맞고 PyPI 배포판에서는 제공되지 않는 1비트 Adam과 같은 특정 기능을 지원하려면 <a href="https://www.deepspeed.ai/tutorials/advanced-install/#install-deepspeed-from-source" rel="nofollow">소스에서 설치하기</a>를 적극 권장합니다.';return{c(){s=o("p"),s.innerHTML=i},l(U){s=j(U,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1rxp0gi"&&(s.innerHTML=i)},m(U,p){t(U,s,p)},p:W,d(U){U&&e(s)}}}function Fn(A){let s,i;return s=new g({props:{code:"cGlwJTIwaW5zdGFsbCUyMGRlZXBzcGVlZA==",highlighted:"pip install deepspeed",wrap:!1}}),{c(){r(s.$$.fragment)},l(U){I(s.$$.fragment,U)},m(U,p){c(s,U,p),i=!0},p:W,i(U){i||(u(s.$$.fragment,U),i=!0)},o(U){m(s.$$.fragment,U),i=!1},d(U){d(s,U)}}}function Sn(A){let s,i;return s=new g({props:{code:"cGlwJTIwaW5zdGFsbCUyMHRyYW5zZm9ybWVycyU1QmRlZXBzcGVlZCU1RA==",highlighted:"pip install transformers[deepspeed]",wrap:!1}}),{c(){r(s.$$.fragment)},l(U){I(s.$$.fragment,U)},m(U,p){c(s,U,p),i=!0},p:W,i(U){i||(u(s.$$.fragment,U),i=!0)},o(U){m(s.$$.fragment,U),i=!1},d(U){d(s,U)}}}function kn(A){let s,i,U,p;return s=new z({props:{id:"install",option:"PyPI",$$slots:{default:[Fn]},$$scope:{ctx:A}}}),U=new z({props:{id:"install",option:"Transformers",$$slots:{default:[Sn]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment)},l(n){I(s.$$.fragment,n),i=T(n),I(U.$$.fragment,n)},m(n,J){c(s,n,J),t(n,i,J),c(U,n,J),p=!0},p(n,J){const y={};J&2&&(y.$$scope={dirty:J,ctx:n}),s.$set(y);const Q={};J&2&&(Q.$$scope={dirty:J,ctx:n}),U.$set(Q)},i(n){p||(u(s.$$.fragment,n),u(U.$$.fragment,n),p=!0)},o(n){m(s.$$.fragment,n),m(U.$$.fragment,n),p=!1},d(n){n&&e(i),d(s,n),d(U,n)}}}function On(A){let s,i='DeepSpeed 구성 옵션의 전체 목록은 <a href="https://www.deepspeed.ai/docs/config-json/" rel="nofollow">DeepSpeed Configuration JSON</a>에서 확인할 수 있습니다. 또한 <a href="https://github.com/microsoft/DeepSpeedExamples" rel="nofollow">DeepSpeedExamples</a> 리포지토리 또는 기본 <a href="https://github.com/microsoft/DeepSpeed" rel="nofollow">DeepSpeed</a> 리포지토리에서 다양한 DeepSpeed 구성 예제에 대한 보다 실용적인 예제를 찾을 수 있습니다. 구체적인 예제를 빠르게 찾으려면 다음과 같이 하세요:',U,p,n;return p=new g({props:{code:"Z2l0JTIwY2xvbmUlMjBodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZtaWNyb3NvZnQlMkZEZWVwU3BlZWRFeGFtcGxlcyUwQWNkJTIwRGVlcFNwZWVkRXhhbXBsZXMlMEFmaW5kJTIwLiUyMC1uYW1lJTIwJypqc29uJyUwQSUyMyUyMExhbWIlMjAlRUMlOTglQjUlRUQlOEIlQjAlRUIlQTclODglRUMlOUQlQjQlRUMlQTAlODAlMjAlRUMlODMlOTglRUQlOTQlOEMlMjAlRUMlQjAlQkUlRUElQjglQjAlMEFncmVwJTIwLWklMjBMYW1iJTIwJTI0KGZpbmQlMjAuJTIwLW5hbWUlMjAnKmpzb24nKQ==",highlighted:`git <span class="hljs-built_in">clone</span> https://github.com/microsoft/DeepSpeedExamples | |
| <span class="hljs-built_in">cd</span> DeepSpeedExamples | |
| find . -name <span class="hljs-string">'*json'</span> | |
| <span class="hljs-comment"># Lamb 옵티마이저 샘플 찾기</span> | |
| grep -i Lamb $(find . -name <span class="hljs-string">'*json'</span>)`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment)},l(J){s=j(J,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1v5fsnd"&&(s.innerHTML=i),U=T(J),I(p.$$.fragment,J)},m(J,y){t(J,s,y),t(J,U,y),c(p,J,y),n=!0},p:W,i(J){n||(u(p.$$.fragment,J),n=!0)},o(J){m(p.$$.fragment,J),n=!1},d(J){J&&(e(s),e(U)),d(p,J)}}}function Dn(A){let s,i;return s=new g({props:{code:"VHJhaW5pbmdBcmd1bWVudHMoLi4uJTJDJTIwZGVlcHNwZWVkJTNEJTIycGF0aCUyRnRvJTJGZGVlcHNwZWVkX2NvbmZpZy5qc29uJTIyKQ==",highlighted:'TrainingArguments(..., deepspeed=<span class="hljs-string">"path/to/deepspeed_config.json"</span>)',wrap:!1}}),{c(){r(s.$$.fragment)},l(U){I(s.$$.fragment,U)},m(U,p){c(s,U,p),i=!0},p:W,i(U){i||(u(s.$$.fragment,U),i=!0)},o(U){m(s.$$.fragment,U),i=!1},d(U){d(s,U)}}}function Wn(A){let s,i;return s=new g({props:{code:"ZHNfY29uZmlnX2RpY3QlMjAlM0QlMjBkaWN0KHNjaGVkdWxlciUzRHNjaGVkdWxlcl9wYXJhbXMlMkMlMjBvcHRpbWl6ZXIlM0RvcHRpbWl6ZXJfcGFyYW1zKSUwQWFyZ3MlMjAlM0QlMjBUcmFpbmluZ0FyZ3VtZW50cyguLi4lMkMlMjBkZWVwc3BlZWQlM0Rkc19jb25maWdfZGljdCklMEF0cmFpbmVyJTIwJTNEJTIwVHJhaW5lcihtb2RlbCUyQyUyMGFyZ3MlMkMlMjAuLi4p",highlighted:`ds_config_dict = <span class="hljs-built_in">dict</span>(scheduler=scheduler_params, optimizer=optimizer_params) | |
| args = TrainingArguments(..., deepspeed=ds_config_dict) | |
| trainer = Trainer(model, args, ...)`,wrap:!1}}),{c(){r(s.$$.fragment)},l(U){I(s.$$.fragment,U)},m(U,p){c(s,U,p),i=!0},p:W,i(U){i||(u(s.$$.fragment,U),i=!0)},o(U){m(s.$$.fragment,U),i=!1},d(U){d(s,U)}}}function qn(A){let s,i,U,p;return s=new z({props:{id:"pass-config",option:"path to file",$$slots:{default:[Dn]},$$scope:{ctx:A}}}),U=new z({props:{id:"pass-config",option:"nested dict",$$slots:{default:[Wn]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment)},l(n){I(s.$$.fragment,n),i=T(n),I(U.$$.fragment,n)},m(n,J){c(s,n,J),t(n,i,J),c(U,n,J),p=!0},p(n,J){const y={};J&2&&(y.$$scope={dirty:J,ctx:n}),s.$set(y);const Q={};J&2&&(Q.$$scope={dirty:J,ctx:n}),U.$set(Q)},i(n){p||(u(s.$$.fragment,n),u(U.$$.fragment,n),p=!0)},o(n){m(s.$$.fragment,n),m(U.$$.fragment,n),p=!1},d(n){n&&e(i),d(s,n),d(U,n)}}}function vn(A){let s;return{c(){s=bn("DeepSpeed는 매개변수 이름의 유효성을 검사하지 않으며 오타가 있으면 매개변수의 기본 설정으로 대체합니다. DeepSpeed 엔진 시작 로그 메시지를 보고 어떤 값을 사용할지 확인할 수 있습니다.")},l(i){s=En(i,"DeepSpeed는 매개변수 이름의 유효성을 검사하지 않으며 오타가 있으면 매개변수의 기본 설정으로 대체합니다. DeepSpeed 엔진 시작 로그 메시지를 보고 어떤 값을 사용할지 확인할 수 있습니다.")},m(i,U){t(i,s,U)},d(i){i&&e(s)}}}function Gn(A){let s,i="ZeRO-1은 옵티마이저 상태를 GPU에 분할하여 약간의 속도 향상을 기대할 수 있습니다. ZeRO-1 구성은 다음과 같이 설정할 수 있습니다:",U,p,n;return p=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyemVyb19vcHRpbWl6YXRpb24lMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZSUyMiUzQSUyMDElMEElMjAlMjAlMjAlMjAlN0QlMEElN0Q=",highlighted:`{ | |
| <span class="hljs-attr">"zero_optimization":</span> { | |
| <span class="hljs-attr">"stage":</span> <span class="hljs-number">1</span> | |
| } | |
| }`,wrap:!1}}),{c(){s=o("p"),s.textContent=i,U=a(),r(p.$$.fragment)},l(J){s=j(J,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1orz5x9"&&(s.textContent=i),U=T(J),I(p.$$.fragment,J)},m(J,y){t(J,s,y),t(J,U,y),c(p,J,y),n=!0},p:W,i(J){n||(u(p.$$.fragment,J),n=!0)},o(J){m(p.$$.fragment,J),n=!1},d(J){J&&(e(s),e(U)),d(p,J)}}}function Xn(A){let s,i="ZeRO-2는 GPU에서 옵티마이저와 그레이디언트를 분할합니다. 이 단계는 추론과 관련이 없는 기능이기 때문에 주로 훈련에 사용됩니다. 더 나은 성능을 위해 구성해야 할 몇 가지 중요한 매개변수는 다음과 같습니다:",U,p,n="<li>GPU 메모리 사용량을 줄이려면 <code>offload_optimizer</code>를 활성화해야 합니다.</li> <li><code>true</code>로 설정된 경우 <code>overlap_comm</code>은 GPU 메모리 사용량 증가를 상쇄하여 지연 시간을 줄입니다. 이 기능은 4.5배의 <code>allgather_bucket_size</code> 및 <code>reduce_bucket_size</code>값을 사용합니다. 이 예에서는 <code>5e8</code>로 설정되어 있으므로 9GB의 GPU 메모리가 필요합니다. GPU 메모리가 8GB 이하인 경우, 메모리 요구량을 낮추고 메모리 부족(OOM) 오류를 방지하기 위해 <code>overlap_comm</code>을 줄여야 합니다.</li> <li><code>allgather_bucket_size</code>와 <code>reduce_bucket_size</code>는 사용 가능한 GPU 메모리와 통신 속도를 절충합니다. 값이 작을수록 통신 속도가 느려지고 더 많은 GPU 메모리를 사용할 수 있습니다. 예를 들어, 배치 크기가 큰 것이 약간 느린 훈련 시간보다 더 중요한지 균형을 맞출 수 있습니다.</li> <li>DeepSpeed 0.4.4에서는 CPU 오프로딩을 위해 <code>round_robin_gradients</code>를 사용할 수 있습니다. 이 기능은 세분화된 그레이디언트 파티셔닝을 통해 등급 간 그레이디언트 복사를 CPU 메모리로 병렬화합니다. 성능 이점은 그레이디언트 누적 단계(최적화 단계 간 복사 횟수 증가) 또는 GPU 수(병렬 처리 증가)에 따라 증가합니다.</li>",J,y,Q;return y=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyemVyb19vcHRpbWl6YXRpb24lMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZSUyMiUzQSUyMDIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvZmZsb2FkX29wdGltaXplciUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmRldmljZSUyMiUzQSUyMCUyMmNwdSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnBpbl9tZW1vcnklMjIlM0ElMjB0cnVlJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyYWxsZ2F0aGVyX3BhcnRpdGlvbnMlMjIlM0ElMjB0cnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyYWxsZ2F0aGVyX2J1Y2tldF9zaXplJTIyJTNBJTIwNWU4JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyb3ZlcmxhcF9jb21tJTIyJTNBJTIwdHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJlZHVjZV9zY2F0dGVyJTIyJTNBJTIwdHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJlZHVjZV9idWNrZXRfc2l6ZSUyMiUzQSUyMDVlOCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmNvbnRpZ3VvdXNfZ3JhZGllbnRzJTIyJTNBJTIwdHJ1ZSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnJvdW5kX3JvYmluX2dyYWRpZW50cyUyMiUzQSUyMHRydWUlMEElMjAlMjAlMjAlMjAlN0QlMEElN0Q=",highlighted:`{ | |
| <span class="hljs-attr">"zero_optimization":</span> { | |
| <span class="hljs-attr">"stage":</span> <span class="hljs-number">2</span>, | |
| <span class="hljs-attr">"offload_optimizer":</span> { | |
| <span class="hljs-attr">"device":</span> <span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-attr">"pin_memory":</span> <span class="hljs-literal">true</span> | |
| }, | |
| <span class="hljs-attr">"allgather_partitions":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"allgather_bucket_size":</span> <span class="hljs-number">5e8</span>, | |
| <span class="hljs-attr">"overlap_comm":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"reduce_scatter":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"reduce_bucket_size":</span> <span class="hljs-number">5e8</span>, | |
| <span class="hljs-attr">"contiguous_gradients":</span> <span class="hljs-literal">true</span> | |
| <span class="hljs-attr">"round_robin_gradients":</span> <span class="hljs-literal">true</span> | |
| } | |
| }`,wrap:!1}}),{c(){s=o("p"),s.textContent=i,U=a(),p=o("ul"),p.innerHTML=n,J=a(),r(y.$$.fragment)},l(f){s=j(f,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1addg8j"&&(s.textContent=i),U=T(f),p=j(f,"UL",{"data-svelte-h":!0}),C(p)!=="svelte-103xxuy"&&(p.innerHTML=n),J=T(f),I(y.$$.fragment,f)},m(f,R){t(f,s,R),t(f,U,R),t(f,p,R),t(f,J,R),c(y,f,R),Q=!0},p:W,i(f){Q||(u(y.$$.fragment,f),Q=!0)},o(f){m(y.$$.fragment,f),Q=!1},d(f){f&&(e(s),e(U),e(p),e(J)),d(y,f)}}}function zn(A){let s,i='ZeRO-3로 대규모 모델을 초기화하고 매개변수에 액세스하는 방법에 대한 자세한 내용은 <a href="https://deepspeed.readthedocs.io/en/latest/zero3.html#constructing-massive-models" rel="nofollow">Constructing Massive Models</a> 및 <a href="https://deepspeed.readthedocs.io/en/latest/zero3.html#gathering-parameters" rel="nofollow">Gathering Parameters</a> 가이드를 참조하세요.';return{c(){s=o("p"),s.innerHTML=i},l(U){s=j(U,"P",{"data-svelte-h":!0}),C(s)!=="svelte-15pqmva"&&(s.innerHTML=i)},m(U,p){t(U,s,p)},p:W,d(U){U&&e(s)}}}function Hn(A){let s,i="ZeRO-3는 옵티마이저, 그래디언트, 매개변수를 여러 GPU에 걸쳐 분할합니다. ZeRO-2와 달리 ZeRO-3는 여러 GPU에 대규모 모델을 가져올 수 있기 때문에 훈련 외에도 추론에도 사용할 수 있습니다. 구성해야 할 몇 가지 중요한 매개변수는 다음과 같습니다:",U,p,n="<li><p><code>device: "cpu"</code> 는 GPU 메모리가 부족하고 사용 가능한 CPU 메모리가 있는 경우 도움이 될 수 있습니다. 이를 통해 모델 매개변수를 CPU로 오프로드할 수 있습니다.</p></li> <li><p><code>pin_memory: true</code> 는 처리량을 향상시킬 수 있지만, 핀 메모리는 메모리를 요청한 특정 프로세스를 위해 예약되어 있고 일반적으로 일반 CPU 메모리보다 훨씬 빠르게 액세스되기 때문에 다른 프로세스에서 사용할 수 있는 메모리가 줄어듭니다.</p></li> <li><p><code>stage3_max_live_parameters</code> 는 특정 시간에 GPU에 유지하려는 전체 매개변수의 상한값입니다. OOM 오류가 발생하면 이 값을 줄이세요.</p></li> <li><p><code>stage3_max_reuse_distance</code> 는 향후 매개변수를 다시 사용할 시기를 결정하는 값으로, 매개변수를 버릴지 유지할지 결정하는 데 도움이 됩니다. 매개변수를 재사용할 경우(<code>stage3_max_reuse_distance</code>보다 작은 값인 경우) 통신 오버헤드를 줄이기 위해 매개변수를 유지합니다. 이 기능은 활성화 체크포인팅이 활성화되어 있고 역전파 계산시까지 순전파 시점의 매개변수를 유지하려는 경우에 매우 유용합니다. 그러나 OOM 오류가 발생하면 이 값을 줄이세요.</p></li> <li><p>모델 저장 시 <code>stage3_gather_16bit_weights_on_model_save</code>는 fp16 가중치를 통합합니다. 대규모 모델을 학습하거나 여러 GPU를 사용할 경우 메모리와 속도 측면에서 비용이 많이 듭니다. 훈련을 재개할 계획이라면 이 옵션을 활성화해야 합니다.</p></li> <li><p><code>sub_group_size</code> 는 최적화 단계에서 업데이트되는 매개변수를 제어합니다. 매개변수는 <code>sub_group_size</code>의 버킷으로 그룹화되며 각 버킷은 한 번에 하나씩 업데이트됩니다. NVMe 오프로드와 함께 사용하는 경우 <code>sub_group_size</code>는 최적화 단계 중 모델 상태가 CPU 메모리로 이동하는 시점을 결정합니다. 이렇게 하면 매우 큰 모델의 CPU 메모리 부족을 방지할 수 있습니다. NVMe 오프로드를 사용하지 않는 경우 <code>sub_group_size</code>를 기본값으로 둘 수 있지만, 사용하는 경우 변경하는 것이 좋습니다:</p> <ol><li>옵티마이저 단계에서 OOM 오류가 발생합니다. 이 경우, 임시 버퍼의 메모리 사용량을 줄이려면 <code>sub_group_size</code>를 줄이세요.</li> <li>옵티마이저 단계에서 시간이 너무 오래 걸립니다. 이 경우 데이터 버퍼 증가로 인한 대역폭 사용률을 개선하기 위해 <code>sub_group_size</code>를 늘리세요.</li></ol></li> <li><p><code>reduce_bucket_size</code>, <code>stage3_prefetch_bucket_size</code>, <code>stage3_param_persistence_threshold</code>는 모델의 숨겨진 크기에 따라 달라집니다. 이 값들을 <code>auto</code>으로 설정하고 <code>Trainer</code>가 자동으로 값을 할당하도록 허용하는 것이 좋습니다.</p></li>",J,y,Q,f,R='<a href="https://deepspeed.readthedocs.io/en/latest/zero3.html#deepspeed.zero.Init" rel="nofollow"><code>deepspeed.zero.Init</code></a> 컨텍스트 매니저를 사용하면 모델을 더 빠르게 초기화할 수 있습니다:',E,b,Z,w,B="사전 학습된 모델의 경우, 딥스피드 구성 파일에 <code>is_deepspeed_zero3_enabled: true</code>가 <code>TrainingArguments</code>에 설정되어 있어야 하며, ZeRO 구성이 활성화되어 있어야 합니다. 훈련된 모델 <code>from_pretrained()</code>을 호출하기 <strong>전에</strong> <code>TrainingArguments</code> 객체를 생성해야 합니다.",S,O,_,h,D="fp16 가중치가 단일 GPU에 맞지 않는 경우 ZeRO-3이 필요합니다. fp16 가중치를 로드할 수 있는 경우, <code>from_pretrained()</code>에 <code>torch_dtype=torch.float16</code>을 지정해야 합니다.",G,Y,L="ZeRO-3의 또 다른 고려 사항은 여러 개의 GPU를 사용하는 경우 현재 실행 중인 레이어의 매개변수가 아닌 한 단일 GPU에 모든 매개변수가 없다는 것입니다. 사전 훈련된 모델 가중치를 <code>from_pretrained()</code>에 로드하는 등 모든 레이어의 모든 매개변수에 한 번에 액세스하려면 한 번에 하나의 레이어를 로드하고 즉시 모든 GPU에 파티셔닝합니다. 이는 매우 큰 모델의 경우 메모리 제한으로 인해 하나의 GPU에 가중치를 로드한 다음 다른 GPU에 분산할 수 없기 때문입니다.",H,X,K="다음과 같이 보이는 모델 매개변수 가중치(여기서 <code>tensor([1.])</code>) 또는 매개변수 크기가 더 큰 다차원 형태 대신 1인 경우, 이는 매개변수가 분할되어 있으며 이것이 ZeRO-3 플레이스홀더인 것을 의미합니다.",ll,q,el,v,P;return y=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyemVyb19vcHRpbWl6YXRpb24lMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZSUyMiUzQSUyMDMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvZmZsb2FkX29wdGltaXplciUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmRldmljZSUyMiUzQSUyMCUyMmNwdSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnBpbl9tZW1vcnklMjIlM0ElMjB0cnVlJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyb2ZmbG9hZF9wYXJhbSUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmRldmljZSUyMiUzQSUyMCUyMmNwdSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnBpbl9tZW1vcnklMjIlM0ElMjB0cnVlJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyb3ZlcmxhcF9jb21tJTIyJTNBJTIwdHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmNvbnRpZ3VvdXNfZ3JhZGllbnRzJTIyJTNBJTIwdHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN1Yl9ncm91cF9zaXplJTIyJTNBJTIwMWU5JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycmVkdWNlX2J1Y2tldF9zaXplJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN0YWdlM19wcmVmZXRjaF9idWNrZXRfc2l6ZSUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZTNfcGFyYW1fcGVyc2lzdGVuY2VfdGhyZXNob2xkJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN0YWdlM19tYXhfbGl2ZV9wYXJhbWV0ZXJzJTIyJTNBJTIwMWU5JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RhZ2UzX21heF9yZXVzZV9kaXN0YW5jZSUyMiUzQSUyMDFlOSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN0YWdlM19nYXRoZXJfMTZiaXRfd2VpZ2h0c19vbl9tb2RlbF9zYXZlJTIyJTNBJTIwdHJ1ZSUwQSUyMCUyMCUyMCUyMCU3RCUwQSU3RA==",highlighted:`{ | |
| <span class="hljs-attr">"zero_optimization":</span> { | |
| <span class="hljs-attr">"stage":</span> <span class="hljs-number">3</span>, | |
| <span class="hljs-attr">"offload_optimizer":</span> { | |
| <span class="hljs-attr">"device":</span> <span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-attr">"pin_memory":</span> <span class="hljs-literal">true</span> | |
| }, | |
| <span class="hljs-attr">"offload_param":</span> { | |
| <span class="hljs-attr">"device":</span> <span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-attr">"pin_memory":</span> <span class="hljs-literal">true</span> | |
| }, | |
| <span class="hljs-attr">"overlap_comm":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"contiguous_gradients":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"sub_group_size":</span> <span class="hljs-number">1e9</span>, | |
| <span class="hljs-attr">"reduce_bucket_size":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"stage3_prefetch_bucket_size":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"stage3_param_persistence_threshold":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"stage3_max_live_parameters":</span> <span class="hljs-number">1e9</span>, | |
| <span class="hljs-attr">"stage3_max_reuse_distance":</span> <span class="hljs-number">1e9</span>, | |
| <span class="hljs-attr">"stage3_gather_16bit_weights_on_model_save":</span> <span class="hljs-literal">true</span> | |
| } | |
| }`,wrap:!1}}),b=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFQ1Rm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uJTJDJTIwVDVDb25maWclMEFpbXBvcnQlMjBkZWVwc3BlZWQlMEElMEF3aXRoJTIwZGVlcHNwZWVkLnplcm8uSW5pdCgpJTNBJTBBJTIwJTIwJTIwJTIwY29uZmlnJTIwJTNEJTIwVDVDb25maWcuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQSUyMCUyMCUyMCUyMG1vZGVsJTIwJTNEJTIwVDVGb3JDb25kaXRpb25hbEdlbmVyYXRpb24oY29uZmlnKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> T5ForConditionalGeneration, T5Config | |
| <span class="hljs-keyword">import</span> deepspeed | |
| <span class="hljs-keyword">with</span> deepspeed.zero.Init(): | |
| config = T5Config.from_pretrained(<span class="hljs-string">"google-t5/t5-small"</span>) | |
| model = T5ForConditionalGeneration(config)`,wrap:!1}}),O=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbCUyQyUyMFRyYWluZXIlMkMlMjBUcmFpbmluZ0FyZ3VtZW50cyUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBUcmFpbmluZ0FyZ3VtZW50cyguLi4lMkMlMjBkZWVwc3BlZWQlM0Rkc19jb25maWcpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmdvb2dsZS10NSUyRnQ1LXNtYWxsJTIyKSUwQXRyYWluZXIlMjAlM0QlMjBUcmFpbmVyKG1vZGVsJTNEbW9kZWwlMkMlMjBhcmdzJTNEdHJhaW5pbmdfYXJncyUyQyUyMC4uLik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModel, Trainer, TrainingArguments | |
| training_args = TrainingArguments(..., deepspeed=ds_config) | |
| model = AutoModel.from_pretrained(<span class="hljs-string">"google-t5/t5-small"</span>) | |
| trainer = Trainer(model=model, args=training_args, ...)`,wrap:!1}}),q=new g({props:{code:"dGVuc29yKCU1QjEuMCU1RCUyQyUyMGRldmljZSUzRCUyMmN1ZGElM0EwJTIyJTJDJTIwZHR5cGUlM0R0b3JjaC5mbG9hdDE2JTJDJTIwcmVxdWlyZXNfZ3JhZCUzRFRydWUp",highlighted:'tensor([<span class="hljs-number">1.0</span>], device=<span class="hljs-string">"cuda:0"</span>, dtype=torch.float16, requires_grad=<span class="hljs-literal">True</span>)',wrap:!1}}),v=new al({props:{$$slots:{default:[zn]},$$scope:{ctx:A}}}),{c(){s=o("p"),s.textContent=i,U=a(),p=o("ul"),p.innerHTML=n,J=a(),r(y.$$.fragment),Q=a(),f=o("p"),f.innerHTML=R,E=a(),r(b.$$.fragment),Z=a(),w=o("p"),w.innerHTML=B,S=a(),r(O.$$.fragment),_=a(),h=o("p"),h.innerHTML=D,G=a(),Y=o("p"),Y.innerHTML=L,H=a(),X=o("p"),X.innerHTML=K,ll=a(),r(q.$$.fragment),el=a(),r(v.$$.fragment)},l($){s=j($,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1yt4zfw"&&(s.textContent=i),U=T($),p=j($,"UL",{"data-svelte-h":!0}),C(p)!=="svelte-xw7hac"&&(p.innerHTML=n),J=T($),I(y.$$.fragment,$),Q=T($),f=j($,"P",{"data-svelte-h":!0}),C(f)!=="svelte-nlh1t5"&&(f.innerHTML=R),E=T($),I(b.$$.fragment,$),Z=T($),w=j($,"P",{"data-svelte-h":!0}),C(w)!=="svelte-1oo2t7g"&&(w.innerHTML=B),S=T($),I(O.$$.fragment,$),_=T($),h=j($,"P",{"data-svelte-h":!0}),C(h)!=="svelte-gxyc8m"&&(h.innerHTML=D),G=T($),Y=j($,"P",{"data-svelte-h":!0}),C(Y)!=="svelte-16ar32h"&&(Y.innerHTML=L),H=T($),X=j($,"P",{"data-svelte-h":!0}),C(X)!=="svelte-1jl9ddl"&&(X.innerHTML=K),ll=T($),I(q.$$.fragment,$),el=T($),I(v.$$.fragment,$)},m($,F){t($,s,F),t($,U,F),t($,p,F),t($,J,F),c(y,$,F),t($,Q,F),t($,f,F),t($,E,F),c(b,$,F),t($,Z,F),t($,w,F),t($,S,F),c(O,$,F),t($,_,F),t($,h,F),t($,G,F),t($,Y,F),t($,H,F),t($,X,F),t($,ll,F),c(q,$,F),t($,el,F),c(v,$,F),P=!0},p($,F){const sl={};F&2&&(sl.$$scope={dirty:F,ctx:$}),v.$set(sl)},i($){P||(u(y.$$.fragment,$),u(b.$$.fragment,$),u(O.$$.fragment,$),u(q.$$.fragment,$),u(v.$$.fragment,$),P=!0)},o($){m(y.$$.fragment,$),m(b.$$.fragment,$),m(O.$$.fragment,$),m(q.$$.fragment,$),m(v.$$.fragment,$),P=!1},d($){$&&(e(s),e(U),e(p),e(J),e(Q),e(f),e(E),e(Z),e(w),e(S),e(_),e(h),e(G),e(Y),e(H),e(X),e(ll),e(el)),d(y,$),d(b,$),d(O,$),d(q,$),d(v,$)}}}function xn(A){let s,i,U,p,n,J;return s=new z({props:{id:"zero-config",option:"ZeRO-1",$$slots:{default:[Gn]},$$scope:{ctx:A}}}),U=new z({props:{id:"zero-config",option:"ZeRO-2",$$slots:{default:[Xn]},$$scope:{ctx:A}}}),n=new z({props:{id:"zero-config",option:"ZeRO-3",$$slots:{default:[Hn]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment),p=a(),r(n.$$.fragment)},l(y){I(s.$$.fragment,y),i=T(y),I(U.$$.fragment,y),p=T(y),I(n.$$.fragment,y)},m(y,Q){c(s,y,Q),t(y,i,Q),c(U,y,Q),t(y,p,Q),c(n,y,Q),J=!0},p(y,Q){const f={};Q&2&&(f.$$scope={dirty:Q,ctx:y}),s.$set(f);const R={};Q&2&&(R.$$scope={dirty:Q,ctx:y}),U.$set(R);const E={};Q&2&&(E.$$scope={dirty:Q,ctx:y}),n.$set(E)},i(y){J||(u(s.$$.fragment,y),u(U.$$.fragment,y),u(n.$$.fragment,y),J=!0)},o(y){m(s.$$.fragment,y),m(U.$$.fragment,y),m(n.$$.fragment,y),J=!1},d(y){y&&(e(i),e(p)),d(s,y),d(U,y),d(n,y)}}}function Yn(A){let s,i="구성 파일의 최적화 프로그램 및 스케줄러 매개변수는 명령줄에서 설정할 수 있으므로 오류를 찾기 어렵지 않습니다. 예를 들어 학습 속도가 다른 곳에서 다른 값으로 설정된 경우 명령줄에서 이를 재정의할 수 있습니다. 최적화 프로그램 및 스케줄러 매개변수 외에도 <code>Trainer</code> 명령줄 인수가 DeepSpeed 구성과 일치하는지 확인해야 합니다.";return{c(){s=o("p"),s.innerHTML=i},l(U){s=j(U,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1foqjf4"&&(s.innerHTML=i)},m(U,p){t(U,s,p)},p:W,d(U){U&&e(s)}}}function Ln(A){let s,i='DeepSpeed는 여러 <a href="https://www.deepspeed.ai/docs/config-json/#optimizer-parameters" rel="nofollow">옵티마이저</a>를 제공하지만(Adam, AdamW, OneBitAdam 및 LAMB) PyTorch에서 다른 옵티마이저를 가져올 수도 있습니다. 설정에서 옵티마이저를 구성하지 않으면 <code>Trainer</code>가 자동으로 AdamW를 선택하고 명령줄에서 제공된 값 또는 기본값을 사용합니다: <code>lr</code>, <code>adam_beta1</code>, <code>adam_beta2</code>, <code>adam_epsilon</code>, <code>weight_decay</code>.',U,p,n="매개변수를 <code>"auto"</code>으로 설정하거나 원하는 값을 직접 수동으로 입력할 수 있습니다.",J,y,Q,f,R="최상위 구성에 다음을 추가하여 지원되지 않는 옵티마이저를 사용할 수도 있습니다.",E,b,Z,w,B="DeepSpeed==0.8.3부터 오프로드를 사용하려면 오프로드가 DeepSpeed의 CPU Adam 옵티마이저에서 가장 잘 작동하므로 최상위 수준 구성에 다음 사항을 추가해야 합니다.",S,O,_;return y=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIyb3B0aW1pemVyJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydHlwZSUyMiUzQSUyMCUyMkFkYW1XJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGFyYW1zJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybHIlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyYmV0YXMlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZXBzJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndlaWdodF9kZWNheSUyMiUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlN0QlMEElN0Q=",highlighted:`{ | |
| <span class="hljs-attr">"optimizer":</span> { | |
| <span class="hljs-attr">"type":</span> <span class="hljs-string">"AdamW"</span>, | |
| <span class="hljs-attr">"params":</span> { | |
| <span class="hljs-attr">"lr":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"betas":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"eps":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"weight_decay":</span> <span class="hljs-string">"auto"</span> | |
| } | |
| } | |
| }`,wrap:!1}}),b=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIyemVyb19hbGxvd191bnRlc3RlZF9vcHRpbWl6ZXIlMjIlM0ElMjB0cnVlJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"zero_allow_untested_optimizer":</span> <span class="hljs-literal">true</span> | |
| }`,wrap:!1}}),O=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIyemVyb19mb3JjZV9kc19jcHVfb3B0aW1pemVyJTIyJTNBJTIwZmFsc2UlMEElN0Q=",highlighted:`{ | |
| <span class="hljs-attr">"zero_force_ds_cpu_optimizer":</span> <span class="hljs-literal">false</span> | |
| }`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),p=o("p"),p.innerHTML=n,J=a(),r(y.$$.fragment),Q=a(),f=o("p"),f.textContent=R,E=a(),r(b.$$.fragment),Z=a(),w=o("p"),w.textContent=B,S=a(),r(O.$$.fragment)},l(h){s=j(h,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1fl66hc"&&(s.innerHTML=i),U=T(h),p=j(h,"P",{"data-svelte-h":!0}),C(p)!=="svelte-10dgirt"&&(p.innerHTML=n),J=T(h),I(y.$$.fragment,h),Q=T(h),f=j(h,"P",{"data-svelte-h":!0}),C(f)!=="svelte-1jv0ylj"&&(f.textContent=R),E=T(h),I(b.$$.fragment,h),Z=T(h),w=j(h,"P",{"data-svelte-h":!0}),C(w)!=="svelte-1oc48yb"&&(w.textContent=B),S=T(h),I(O.$$.fragment,h)},m(h,D){t(h,s,D),t(h,U,D),t(h,p,D),t(h,J,D),c(y,h,D),t(h,Q,D),t(h,f,D),t(h,E,D),c(b,h,D),t(h,Z,D),t(h,w,D),t(h,S,D),c(O,h,D),_=!0},p:W,i(h){_||(u(y.$$.fragment,h),u(b.$$.fragment,h),u(O.$$.fragment,h),_=!0)},o(h){m(y.$$.fragment,h),m(b.$$.fragment,h),m(O.$$.fragment,h),_=!1},d(h){h&&(e(s),e(U),e(p),e(J),e(Q),e(f),e(E),e(Z),e(w),e(S)),d(y,h),d(b,h),d(O,h)}}}function Pn(A){let s,i='DeepSpeed는 LRRangeTest, OneCycle, WarmupLR 및 WarmupDecayLR learning rate<a href="https://www.deepspeed.ai/docs/config-json/#scheduler-parameters" rel="nofollow">schedulers</a>를 지원합니다.',U,p,n="트랜스포머와 DeepSpeed는 동일한 두 가지 스케줄러를 제공합니다:",J,y,Q="<li>WarmupLR은 Transformers의 <code>--lr_scheduler_type constant_warmup</code>과 동일합니다.</li> <li>WarmupDecayLR은 Transformers의 <code>--lr_scheduler_type linear</code>와 동일합니다(Transformers에서 사용되는 기본 스케줄러입니다).</li>",f,R,E="설정에서 스케줄러를 구성하지 않으면<code>Trainer</code>는 자동으로 WarmupDecayLR을 선택하고 명령줄에서 제공된 값 또는 기본값을 사용합니다: <code>warmup_min_lr</code>, <code>warmup_max_lr</code>, <code>warmup_num_steps</code>, <code>total_num_steps</code> (<code>max_steps</code>가 제공되지 않으면 런타임 중에 자동으로 계산됨).",b,Z,w="매개변수를 <code>"auto"</code>으로 설정하거나 원하는 값을 직접 수동으로 입력할 수 있습니다.",B,S,O;return S=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIyc2NoZWR1bGVyJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydHlwZSUyMiUzQSUyMCUyMldhcm11cERlY2F5TFIlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJwYXJhbXMlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0b3RhbF9udW1fc3RlcHMlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyd2FybXVwX21pbl9sciUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ3YXJtdXBfbWF4X2xyJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndhcm11cF9udW1fc3RlcHMlMjIlM0ElMjAlMjJhdXRvJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"scheduler":</span> { | |
| <span class="hljs-attr">"type":</span> <span class="hljs-string">"WarmupDecayLR"</span>, | |
| <span class="hljs-attr">"params":</span> { | |
| <span class="hljs-attr">"total_num_steps":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"warmup_min_lr":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"warmup_max_lr":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"warmup_num_steps":</span> <span class="hljs-string">"auto"</span> | |
| } | |
| } | |
| }`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),p=o("p"),p.textContent=n,J=a(),y=o("ul"),y.innerHTML=Q,f=a(),R=o("p"),R.innerHTML=E,b=a(),Z=o("p"),Z.innerHTML=w,B=a(),r(S.$$.fragment)},l(_){s=j(_,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1esiel4"&&(s.innerHTML=i),U=T(_),p=j(_,"P",{"data-svelte-h":!0}),C(p)!=="svelte-1ko1c4g"&&(p.textContent=n),J=T(_),y=j(_,"UL",{"data-svelte-h":!0}),C(y)!=="svelte-1vzqyob"&&(y.innerHTML=Q),f=T(_),R=j(_,"P",{"data-svelte-h":!0}),C(R)!=="svelte-1jk21rk"&&(R.innerHTML=E),b=T(_),Z=j(_,"P",{"data-svelte-h":!0}),C(Z)!=="svelte-10dgirt"&&(Z.innerHTML=w),B=T(_),I(S.$$.fragment,_)},m(_,h){t(_,s,h),t(_,U,h),t(_,p,h),t(_,J,h),t(_,y,h),t(_,f,h),t(_,R,h),t(_,b,h),t(_,Z,h),t(_,B,h),c(S,_,h),O=!0},p:W,i(_){O||(u(S.$$.fragment,_),O=!0)},o(_){m(S.$$.fragment,_),O=!1},d(_){_&&(e(s),e(U),e(p),e(J),e(y),e(f),e(R),e(b),e(Z),e(B)),d(S,_)}}}function Kn(A){let s,i,U,p;return s=new z({props:{id:"opt-sched",option:"optimizer",$$slots:{default:[Ln]},$$scope:{ctx:A}}}),U=new z({props:{id:"opt-sched",option:"scheduler",$$slots:{default:[Pn]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment)},l(n){I(s.$$.fragment,n),i=T(n),I(U.$$.fragment,n)},m(n,J){c(s,n,J),t(n,i,J),c(U,n,J),p=!0},p(n,J){const y={};J&2&&(y.$$scope={dirty:J,ctx:n}),s.$set(y);const Q={};J&2&&(Q.$$scope={dirty:J,ctx:n}),U.$set(Q)},i(n){p||(u(s.$$.fragment,n),u(U.$$.fragment,n),p=!0)},o(n){m(s.$$.fragment,n),m(U.$$.fragment,n),p=!1},d(n){n&&e(i),d(s,n),d(U,n)}}}function lJ(A){let s,i="모델이 혼합 정밀도로 사전 학습되지 않은 경우와 같이 혼합 정밀도로 잘 작동하지 않는 경우 NaN 손실을 유발할 수 있는 오버플로 또는 언더플로 문제가 발생할 수 있습니다. 이러한 경우에는 기본 fp16 모드를 명시적으로 비활성화하여 전체 fp32 정밀도를 사용해야 합니다.",U,p,n,J,y='Ampere GPU 및 PyTorch 1.7 이상의 경우 일부 연산에 대해 더 효율적인 <a href="https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" rel="nofollow">tf32</a> 형식으로 자동 전환되지만 결과는 여전히 fp32로 표시됩니다. <code>Trainer</code>에서 <code>--tf32</code>를 설정하여 활성화하고 <code>--tf32 0</code> 또는 <code>--no_tf32</code>를 비활성화하면 제어할 수 있습니다.',Q;return p=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyZnAxNiUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVuYWJsZWQlMjIlM0ElMjBmYWxzZSUwQSUyMCUyMCUyMCUyMCU3RCUwQSU3RA==",highlighted:`{ | |
| <span class="hljs-attr">"fp16":</span> { | |
| <span class="hljs-attr">"enabled":</span> <span class="hljs-literal">false</span> | |
| } | |
| }`,wrap:!1}}),{c(){s=o("p"),s.textContent=i,U=a(),r(p.$$.fragment),n=a(),J=o("p"),J.innerHTML=y},l(f){s=j(f,"P",{"data-svelte-h":!0}),C(s)!=="svelte-vb21iv"&&(s.textContent=i),U=T(f),I(p.$$.fragment,f),n=T(f),J=j(f,"P",{"data-svelte-h":!0}),C(J)!=="svelte-ii7v2c"&&(J.innerHTML=y)},m(f,R){t(f,s,R),t(f,U,R),c(p,f,R),t(f,n,R),t(f,J,R),Q=!0},p:W,i(f){Q||(u(p.$$.fragment,f),Q=!0)},o(f){m(p.$$.fragment,f),Q=!1},d(f){f&&(e(s),e(U),e(n),e(J)),d(p,f)}}}function eJ(A){let s,i="PyTorch AMP와 같은 fp16 혼합 정밀도를 구성하면 메모리 사용량이 줄어들고 훈련 속도가 빨라집니다.<code>Trainer</code>는 <code>args.fp16_backend</code> 값에 따라 fp16을 자동으로 활성화 또는 비활성화하며, 나머지 구성은 사용자가 설정할 수 있습니다. 명령줄에서 다음 인수를 전달하면 fp16이 활성화됩니다: <code>fp16</code>, <code>--fp16_backend amp</code> 또는 <code>--fp16_full_eval</code>.",U,p,n,J,y='추가 딥스피드 fp16 훈련 옵션은 <a href="https://www.deepspeed.ai/docs/config-json/#fp16-training-options" rel="nofollow">fp16 훈련 옵션</a> 참조를 참조하세요.',Q,f,R="Apex와 같은 fp16 혼합 정밀도를 구성하려면 아래 그림과 같이 <code>"auto"</code> 또는 직접 값을 설정합니다.<code>Trainer</code>는 <code>args.fp16_backend</code> 및 <code>args.fp16_opt_level</code>의 값에 따라 <code>amp</code>를 자동으로 구성합니다. 다음 인수를 전달하면 명령줄에서 활성화할 수도 있습니다: <code>fp16</code>, <code>--fp16_backend apex</code> 또는 <code>--fp16_opt_level 01</code>.",E,b,Z;return p=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyZnAxNiUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVuYWJsZWQlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybG9zc19zY2FsZSUyMiUzQSUyMDAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJsb3NzX3NjYWxlX3dpbmRvdyUyMiUzQSUyMDEwMDAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJpbml0aWFsX3NjYWxlX3Bvd2VyJTIyJTNBJTIwMTYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJoeXN0ZXJlc2lzJTIyJTNBJTIwMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1pbl9sb3NzX3NjYWxlJTIyJTNBJTIwMSUwQSUyMCUyMCUyMCUyMCU3RCUwQSU3RA==",highlighted:`{ | |
| <span class="hljs-attr">"fp16":</span> { | |
| <span class="hljs-attr">"enabled":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"loss_scale":</span> <span class="hljs-number">0</span>, | |
| <span class="hljs-attr">"loss_scale_window":</span> <span class="hljs-number">1000</span>, | |
| <span class="hljs-attr">"initial_scale_power":</span> <span class="hljs-number">16</span>, | |
| <span class="hljs-attr">"hysteresis":</span> <span class="hljs-number">2</span>, | |
| <span class="hljs-attr">"min_loss_scale":</span> <span class="hljs-number">1</span> | |
| } | |
| }`,wrap:!1}}),b=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyYW1wJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZW5hYmxlZCUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvcHRfbGV2ZWwlMjIlM0ElMjAlMjJhdXRvJTIyJTBBJTIwJTIwJTIwJTIwJTdEJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"amp":</span> { | |
| <span class="hljs-attr">"enabled":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"opt_level":</span> <span class="hljs-string">"auto"</span> | |
| } | |
| }`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment),n=a(),J=o("p"),J.innerHTML=y,Q=a(),f=o("p"),f.innerHTML=R,E=a(),r(b.$$.fragment)},l(w){s=j(w,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1xoi7lx"&&(s.innerHTML=i),U=T(w),I(p.$$.fragment,w),n=T(w),J=j(w,"P",{"data-svelte-h":!0}),C(J)!=="svelte-xmsj8e"&&(J.innerHTML=y),Q=T(w),f=j(w,"P",{"data-svelte-h":!0}),C(f)!=="svelte-a6l58o"&&(f.innerHTML=R),E=T(w),I(b.$$.fragment,w)},m(w,B){t(w,s,B),t(w,U,B),c(p,w,B),t(w,n,B),t(w,J,B),t(w,Q,B),t(w,f,B),t(w,E,B),c(b,w,B),Z=!0},p:W,i(w){Z||(u(p.$$.fragment,w),u(b.$$.fragment,w),Z=!0)},o(w){m(p.$$.fragment,w),m(b.$$.fragment,w),Z=!1},d(w){w&&(e(s),e(U),e(n),e(J),e(Q),e(f),e(E)),d(p,w),d(b,w)}}}function tJ(A){let s,i='bf16을 사용하려면 DeepSpeed==0.6.0 이상이 필요합니다. bf16은 fp32와 동적 범위가 동일하며 손실 스케일링이 필요하지 않습니다. 그러나 <a href="#gradient-accumulation">gradient accumulation</a>을 bf16과 함께 사용하면 이 형식의 낮은 정밀도로 인해 손실이 발생할 수 있으므로 원하지 않는 그레이디언트가 bf16에 누적될 수 있습니다.',U,p,n="bf16은 설정 파일에서 설정하거나 다음 인수를 전달하면 명령줄에서 활성화할 수 있습니다: <code>--bf16</code> 또는 <code>--bf16_full_eval</code>.",J,y,Q;return y=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyYmYxNiUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVuYWJsZWQlMjIlM0ElMjAlMjJhdXRvJTIyJTBBJTIwJTIwJTIwJTIwJTdEJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"bf16":</span> { | |
| <span class="hljs-attr">"enabled":</span> <span class="hljs-string">"auto"</span> | |
| } | |
| }`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),p=o("p"),p.innerHTML=n,J=a(),r(y.$$.fragment)},l(f){s=j(f,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1t47zts"&&(s.innerHTML=i),U=T(f),p=j(f,"P",{"data-svelte-h":!0}),C(p)!=="svelte-1r1r5k"&&(p.innerHTML=n),J=T(f),I(y.$$.fragment,f)},m(f,R){t(f,s,R),t(f,U,R),t(f,p,R),t(f,J,R),c(y,f,R),Q=!0},p:W,i(f){Q||(u(y.$$.fragment,f),Q=!0)},o(f){m(y.$$.fragment,f),Q=!1},d(f){f&&(e(s),e(U),e(p),e(J)),d(y,f)}}}function sJ(A){let s,i,U,p,n,J;return s=new z({props:{id:"precision",option:"fp32",$$slots:{default:[lJ]},$$scope:{ctx:A}}}),U=new z({props:{id:"precision",option:"fp16",$$slots:{default:[eJ]},$$scope:{ctx:A}}}),n=new z({props:{id:"precision",option:"bf16",$$slots:{default:[tJ]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment),p=a(),r(n.$$.fragment)},l(y){I(s.$$.fragment,y),i=T(y),I(U.$$.fragment,y),p=T(y),I(n.$$.fragment,y)},m(y,Q){c(s,y,Q),t(y,i,Q),c(U,y,Q),t(y,p,Q),c(n,y,Q),J=!0},p(y,Q){const f={};Q&2&&(f.$$scope={dirty:Q,ctx:y}),s.$set(f);const R={};Q&2&&(R.$$scope={dirty:Q,ctx:y}),U.$set(R);const E={};Q&2&&(E.$$scope={dirty:Q,ctx:y}),n.$set(E)},i(y){J||(u(s.$$.fragment,y),u(U.$$.fragment,y),u(n.$$.fragment,y),J=!0)},o(y){m(s.$$.fragment,y),m(U.$$.fragment,y),m(n.$$.fragment,y),J=!1},d(y){y&&(e(i),e(p)),d(s,y),d(U,y),d(n,y)}}}function MJ(A){let s,i="여러 GPU에 DeepSpeed를 배포하려면 <code>--num_gpus</code> 매개변수를 추가하세요. 사용 가능한 모든 GPU를 사용하려는 경우 <code>--num_gpus</code>를 추가할 필요가 없습니다. 아래 예제에서는 2개의 GPU를 사용합니다.",U,p,n;return p=new g({props:{code:"ZGVlcHNwZWVkJTIwLS1udW1fZ3B1cyUzRDIlMjBleGFtcGxlcyUyRnB5dG9yY2glMkZ0cmFuc2xhdGlvbiUyRnJ1bl90cmFuc2xhdGlvbi5weSUyMCU1QyUwQS0tZGVlcHNwZWVkJTIwdGVzdHMlMkZkZWVwc3BlZWQlMkZkc19jb25maWdfemVybzMuanNvbiUyMCU1QyUwQS0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwZ29vZ2xlLXQ1JTJGdDUtc21hbGwlMjAtLXBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUyMDElMjAlNUMlMEEtLW91dHB1dF9kaXIlMjBvdXRwdXRfZGlyJTIwLS1vdmVyd3JpdGVfb3V0cHV0X2RpciUyMC0tZnAxNiUyMCU1QyUwQS0tZG9fdHJhaW4lMjAtLW1heF90cmFpbl9zYW1wbGVzJTIwNTAwJTIwLS1udW1fdHJhaW5fZXBvY2hzJTIwMSUyMCU1QyUwQS0tZGF0YXNldF9uYW1lJTIwd210MTYlMjAtLWRhdGFzZXRfY29uZmlnJTIwJTIycm8tZW4lMjIlMjAlNUMlMEEtLXNvdXJjZV9sYW5nJTIwZW4lMjAtLXRhcmdldF9sYW5nJTIwcm8=",highlighted:`deepspeed --num_gpus=2 examples/pytorch/translation/run_translation.py \\ | |
| --deepspeed tests/deepspeed/ds_config_zero3.json \\ | |
| --model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \\ | |
| --output_dir output_dir --overwrite_output_dir --fp16 \\ | |
| --do_train --max_train_samples 500 --num_train_epochs 1 \\ | |
| --dataset_name wmt16 --dataset_config <span class="hljs-string">"ro-en"</span> \\ | |
| --source_lang en --target_lang ro`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment)},l(J){s=j(J,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1lsqf1i"&&(s.innerHTML=i),U=T(J),I(p.$$.fragment,J)},m(J,y){t(J,s,y),t(J,U,y),c(p,J,y),n=!0},p:W,i(J){n||(u(p.$$.fragment,J),n=!0)},o(J){m(p.$$.fragment,J),n=!1},d(J){J&&(e(s),e(U)),d(p,J)}}}function UJ(A){let s,i='단일 GPU에서 더 나은 성능을 얻으려면 <a href="#zero-configuration">ZeRO-2</a> 구성 파일에서 <code>allgather_bucket_size</code> 및 <code>reduce_bucket_size</code> 값을 2e8로 설정하세요.';return{c(){s=o("p"),s.innerHTML=i},l(U){s=j(U,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1y8wlwx"&&(s.innerHTML=i)},m(U,p){t(U,s,p)},p:W,d(U){U&&e(s)}}}function nJ(A){let s,i="단일 GPU에 DeepSpeed를 배포하려면 <code>--num_gpus</code> 매개변수를 추가하세요. GPU가 1개만 있는 경우 이 값을 명시적으로 설정할 필요는 없습니다. DeepSpeed는 지정된 노드에서 볼 수 있는 모든 GPU를 배포하므로 이 값을 명시적으로 설정할 필요는 없습니다.",U,p,n,J,y="DeepSpeed는 단 하나의 GPU로도 여전히 유용합니다:",Q,f,R="<li>일부 계산과 메모리를 CPU로 오프로드하여 더 큰 배치 크기를 사용하거나 일반적으로 맞지 않는 매우 큰 모델을 맞추기 위해 모델에 더 많은 GPU 리소스를 사용할 수 있도록 합니다.</li> <li>스마트 GPU 메모리 관리 시스템으로 메모리 조각화를 최소화하여 더 큰 모델과 데이터 배치에 맞출 수 있습니다.</li>",E,b,Z;return p=new g({props:{code:"ZGVlcHNwZWVkJTIwLS1udW1fZ3B1cyUzRDElMjBleGFtcGxlcyUyRnB5dG9yY2glMkZ0cmFuc2xhdGlvbiUyRnJ1bl90cmFuc2xhdGlvbi5weSUyMCU1QyUwQS0tZGVlcHNwZWVkJTIwdGVzdHMlMkZkZWVwc3BlZWQlMkZkc19jb25maWdfemVybzIuanNvbiUyMCU1QyUwQS0tbW9kZWxfbmFtZV9vcl9wYXRoJTIwZ29vZ2xlLXQ1JTJGdDUtc21hbGwlMjAtLXBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUyMDElMjAlNUMlMEEtLW91dHB1dF9kaXIlMjBvdXRwdXRfZGlyJTIwLS1vdmVyd3JpdGVfb3V0cHV0X2RpciUyMC0tZnAxNiUyMCU1QyUwQS0tZG9fdHJhaW4lMjAtLW1heF90cmFpbl9zYW1wbGVzJTIwNTAwJTIwLS1udW1fdHJhaW5fZXBvY2hzJTIwMSUyMCU1QyUwQS0tZGF0YXNldF9uYW1lJTIwd210MTYlMjAtLWRhdGFzZXRfY29uZmlnJTIwJTIycm8tZW4lMjIlMjAlNUMlMEEtLXNvdXJjZV9sYW5nJTIwZW4lMjAtLXRhcmdldF9sYW5nJTIwcm8=",highlighted:`deepspeed --num_gpus=1 examples/pytorch/translation/run_translation.py \\ | |
| --deepspeed tests/deepspeed/ds_config_zero2.json \\ | |
| --model_name_or_path google-t5/t5-small --per_device_train_batch_size 1 \\ | |
| --output_dir output_dir --overwrite_output_dir --fp16 \\ | |
| --do_train --max_train_samples 500 --num_train_epochs 1 \\ | |
| --dataset_name wmt16 --dataset_config <span class="hljs-string">"ro-en"</span> \\ | |
| --source_lang en --target_lang ro`,wrap:!1}}),b=new al({props:{$$slots:{default:[UJ]},$$scope:{ctx:A}}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment),n=a(),J=o("p"),J.textContent=y,Q=a(),f=o("ol"),f.innerHTML=R,E=a(),r(b.$$.fragment)},l(w){s=j(w,"P",{"data-svelte-h":!0}),C(s)!=="svelte-tq3q0i"&&(s.innerHTML=i),U=T(w),I(p.$$.fragment,w),n=T(w),J=j(w,"P",{"data-svelte-h":!0}),C(J)!=="svelte-452bib"&&(J.textContent=y),Q=T(w),f=j(w,"OL",{"data-svelte-h":!0}),C(f)!=="svelte-oazmit"&&(f.innerHTML=R),E=T(w),I(b.$$.fragment,w)},m(w,B){t(w,s,B),t(w,U,B),c(p,w,B),t(w,n,B),t(w,J,B),t(w,Q,B),t(w,f,B),t(w,E,B),c(b,w,B),Z=!0},p(w,B){const S={};B&2&&(S.$$scope={dirty:B,ctx:w}),b.$set(S)},i(w){Z||(u(p.$$.fragment,w),u(b.$$.fragment,w),Z=!0)},o(w){m(p.$$.fragment,w),m(b.$$.fragment,w),Z=!1},d(w){w&&(e(s),e(U),e(n),e(J),e(Q),e(f),e(E)),d(p,w),d(b,w)}}}function JJ(A){let s,i,U,p;return s=new z({props:{id:"deploy",option:"multi-GPU",$$slots:{default:[MJ]},$$scope:{ctx:A}}}),U=new z({props:{id:"deploy",option:"single-GPU",$$slots:{default:[nJ]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment)},l(n){I(s.$$.fragment,n),i=T(n),I(U.$$.fragment,n)},m(n,J){c(s,n,J),t(n,i,J),c(U,n,J),p=!0},p(n,J){const y={};J&2&&(y.$$scope={dirty:J,ctx:n}),s.$set(y);const Q={};J&2&&(Q.$$scope={dirty:J,ctx:n}),U.$set(Q)},i(n){p||(u(s.$$.fragment,n),u(U.$$.fragment,n),p=!0)},o(n){m(s.$$.fragment,n),m(U.$$.fragment,n),p=!1},d(n){n&&e(i),d(s,n),d(U,n)}}}function aJ(A){let s,i='<a href="https://pytorch.org/docs/stable/elastic/run.html" rel="nofollow">torchrun</a>의 경우, 각 노드에 ssh로 접속한 후 두 노드 모두에서 다음 명령을 실행해야 합니다. 런처는 두 노드가 동기화될 때까지 기다렸다가 트레이닝을 시작합니다.',U,p,n;return p=new g({props:{code:"dG9yY2hydW4lMjAtLW5wcm9jX3Blcl9ub2RlJTNEOCUyMC0tbm5vZGUlM0QyJTIwLS1ub2RlX3JhbmslM0QwJTIwLS1tYXN0ZXJfYWRkciUzRGhvc3RuYW1lMSUyMCU1QyUwQS0tbWFzdGVyX3BvcnQlM0Q5OTAxJTIweW91cl9wcm9ncmFtLnB5JTIwJTNDbm9ybWFsJTIwY2wlMjBhcmdzJTNFJTIwLS1kZWVwc3BlZWQlMjBkc19jb25maWcuanNvbg==",highlighted:`torchrun --nproc_per_node=8 --nnode=2 --node_rank=0 --master_addr=hostname1 \\ | |
| --master_port=9901 your_program.py <normal cl args> --deepspeed ds_config.json`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment)},l(J){s=j(J,"P",{"data-svelte-h":!0}),C(s)!=="svelte-iooldj"&&(s.innerHTML=i),U=T(J),I(p.$$.fragment,J)},m(J,y){t(J,s,y),t(J,U,y),c(p,J,y),n=!0},p:W,i(J){n||(u(p.$$.fragment,J),n=!0)},o(J){m(p.$$.fragment,J),n=!1},d(J){J&&(e(s),e(U)),d(p,J)}}}function TJ(A){let s,i="<code>deepspeed</code> 런처의 경우, 먼저 <code>hostfile</code>을 생성합니다.",U,p,n,J,y="그런 다음 다음 명령어로 트레이닝을 시작할 수 있습니다. <code>deepspeed</code> 런처는 두 노드에서 동시에 명령을 자동으로 실행합니다.",Q,f,R,E,b='다중 노드 컴퓨팅 리소스 구성에 대한 자세한 내용은 <a href="https://www.deepspeed.ai/getting-started/#resource-configuration-multi-node" rel="nofollow">Resource Configuration (multi-node)</a> 가이드를 참조하세요.',Z;return p=new g({props:{code:"aG9zdG5hbWUxJTIwc2xvdHMlM0Q4JTBBaG9zdG5hbWUyJTIwc2xvdHMlM0Q4",highlighted:`hostname1 slots=8 | |
| hostname2 slots=8`,wrap:!1}}),f=new g({props:{code:"ZGVlcHNwZWVkJTIwLS1udW1fZ3B1cyUyMDglMjAtLW51bV9ub2RlcyUyMDIlMjAtLWhvc3RmaWxlJTIwaG9zdGZpbGUlMjAtLW1hc3Rlcl9hZGRyJTIwaG9zdG5hbWUxJTIwLS1tYXN0ZXJfcG9ydCUzRDk5MDElMjAlNUMlMEF5b3VyX3Byb2dyYW0ucHklMjAlM0Nub3JtYWwlMjBjbCUyMGFyZ3MlM0UlMjAtLWRlZXBzcGVlZCUyMGRzX2NvbmZpZy5qc29u",highlighted:`deepspeed --num_gpus 8 --num_nodes 2 --hostfile hostfile --master_addr hostname1 --master_port=9901 \\ | |
| your_program.py <normal cl args> --deepspeed ds_config.json`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment),n=a(),J=o("p"),J.innerHTML=y,Q=a(),r(f.$$.fragment),R=a(),E=o("p"),E.innerHTML=b},l(w){s=j(w,"P",{"data-svelte-h":!0}),C(s)!=="svelte-12t5um9"&&(s.innerHTML=i),U=T(w),I(p.$$.fragment,w),n=T(w),J=j(w,"P",{"data-svelte-h":!0}),C(J)!=="svelte-1832ezh"&&(J.innerHTML=y),Q=T(w),I(f.$$.fragment,w),R=T(w),E=j(w,"P",{"data-svelte-h":!0}),C(E)!=="svelte-15phe24"&&(E.innerHTML=b)},m(w,B){t(w,s,B),t(w,U,B),c(p,w,B),t(w,n,B),t(w,J,B),t(w,Q,B),c(f,w,B),t(w,R,B),t(w,E,B),Z=!0},p:W,i(w){Z||(u(p.$$.fragment,w),u(f.$$.fragment,w),Z=!0)},o(w){m(p.$$.fragment,w),m(f.$$.fragment,w),Z=!1},d(w){w&&(e(s),e(U),e(n),e(J),e(Q),e(R),e(E)),d(p,w),d(f,w)}}}function pJ(A){let s,i,U,p;return s=new z({props:{id:"multinode",option:"torchrun",$$slots:{default:[aJ]},$$scope:{ctx:A}}}),U=new z({props:{id:"multinode",option:"deepspeed",$$slots:{default:[TJ]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment)},l(n){I(s.$$.fragment,n),i=T(n),I(U.$$.fragment,n)},m(n,J){c(s,n,J),t(n,i,J),c(U,n,J),p=!0},p(n,J){const y={};J&2&&(y.$$scope={dirty:J,ctx:n}),s.$set(y);const Q={};J&2&&(Q.$$scope={dirty:J,ctx:n}),U.$set(Q)},i(n){p||(u(s.$$.fragment,n),u(U.$$.fragment,n),p=!0)},o(n){m(s.$$.fragment,n),m(U.$$.fragment,n),p=!1},d(n){n&&e(i),d(s,n),d(U,n)}}}function yJ(A){let s,i="ZeRO-2로 훈련된 모델은 pytorch_model.bin 가중치를 fp16에 저장합니다. ZeRO-3으로 훈련된 모델의 모델 가중치를 fp16에 저장하려면 모델 가중치가 여러 GPU에 분할되어 있으므로 <code>“stage3_gather_16bit_weights_on_model_save”: true</code>를 설정해야 합니다. 그렇지 않으면 <code>Trainer</code>가 가중치를 fp16에 저장하지 않고 pytorch_model.bin 파일을 생성하지 않습니다. 이는 DeepSpeed의 state_dict에 실제 가중치 대신 플레이스홀더가 포함되어 있어 이를 로드할 수 없기 때문입니다.",U,p,n;return p=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyemVyb19vcHRpbWl6YXRpb24lMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZTNfZ2F0aGVyXzE2Yml0X3dlaWdodHNfb25fbW9kZWxfc2F2ZSUyMiUzQSUyMHRydWUlMEElMjAlMjAlMjAlMjAlN0QlMEElN0Q=",highlighted:`{ | |
| <span class="hljs-attr">"zero_optimization":</span> { | |
| <span class="hljs-attr">"stage3_gather_16bit_weights_on_model_save":</span> <span class="hljs-literal">true</span> | |
| } | |
| }`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment)},l(J){s=j(J,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1u8kmrr"&&(s.innerHTML=i),U=T(J),I(p.$$.fragment,J)},m(J,y){t(J,s,y),t(J,U,y),c(p,J,y),n=!0},p:W,i(J){n||(u(p.$$.fragment,J),n=!0)},o(J){m(p.$$.fragment,J),n=!1},d(J){J&&(e(s),e(U)),d(p,J)}}}function iJ(A){let s,i="<code>load_state_dict_from_zero_checkpoint</code>가 실행되면 동일한 애플리케이션의 컨텍스트에서 모델을 더 이상 DeepSpeed에서 사용할 수 없습니다. <code>model.load_state_dict(state_dict)</code>는 모든 딥스피드 마법을 제거하므로 딥스피드 엔진을 다시 초기화해야 합니다. 이 기능은 훈련이 끝날 때만 사용하세요.";return{c(){s=o("p"),s.innerHTML=i},l(U){s=j(U,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1dng6id"&&(s.innerHTML=i)},m(U,p){t(U,s,p)},p:W,d(U){U&&e(s)}}}function oJ(A){let s,i="자세한 사용법은 <code>python zero_to_fp32.py -h</code>를 실행하세요. 이 스크립트에는 최종 fp32 가중치의 2배의 일반 RAM이 필요합니다.";return{c(){s=o("p"),s.innerHTML=i},l(U){s=j(U,"P",{"data-svelte-h":!0}),C(s)!=="svelte-10dkyr5"&&(s.innerHTML=i)},m(U,p){t(U,s,p)},p:W,d(U){U&&e(s)}}}function jJ(A){let s,i="전체 정밀 가중치는 많은 메모리가 필요할 수 있으므로 트레이닝 중에 저장해서는 안 됩니다. 일반적으로 훈련이 완료된 후 오프라인으로 fp32 가중치를 저장하는 것이 가장 좋습니다. 그러나 여유 CPU 메모리가 많은 경우 훈련 중에 fp32 가중치를 저장할 수 있습니다. 이 섹션에서는 온라인과 오프라인 방식을 모두 다룹니다.",U,p,n,J,y="다음과 같이 최신 체크포인트를 로드하려면 체크포인트를 하나 이상 저장해야 합니다:",Q,f,R,E,b="<code>--load_best_model_at_end</code> 매개변수를 활성화하여 <code>TrainingArguments</code>에서 최적의 체크포인트를 추적하는 경우, 먼저 학습을 완료하고 최종 모델을 명시적으로 저장할 수 있습니다. 그런 다음 아래와 같이 다시 로드할 수 있습니다:",Z,w,B,S,O,_,h="fp32 가중치의 state_dict를 추출하여 로드할 수도 있습니다:",D,G,Y,L,H,X,K="DeepSpeed는 언제든지 가중치를 추출할 수 있도록 체크포인트 폴더의 최상위 레벨에 zero_to_fp32.py 스크립트를 제공합니다. 이 스크립트는 독립형 스크립트로 구성 파일이나 <code>Trainer</code>가 필요하지 않습니다.",ll,q,el="예를 들어 체크포인트 폴더가 다음과 같은 경우입니다:",v,P,$,F,sl="딥스피드 체크포인트(ZeRO-2 또는 ZeRO-3) 하위 폴더 <code>global_step1</code>에서 fp32 가중치를 재구성하려면 다음 명령을 실행하여 여러 GPU의 전체 fp32 가중치를 단일 pytorch_model.bin 파일로 생성하고 통합합니다. 스크립트는 자동으로 체크포인트가 포함된 하위 폴더를 찾습니다.",tl,Ml,nl,x,ml;return p=new k({props:{title:"온라인 환경",local:"online",headingTag:"h3"}}),f=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycy50cmFpbmVyX3V0aWxzJTIwaW1wb3J0JTIwZ2V0X2xhc3RfY2hlY2twb2ludCUwQWZyb20lMjBkZWVwc3BlZWQudXRpbHMuemVyb190b19mcDMyJTIwaW1wb3J0JTIwbG9hZF9zdGF0ZV9kaWN0X2Zyb21femVyb19jaGVja3BvaW50JTBBJTBBY2hlY2twb2ludF9kaXIlMjAlM0QlMjBnZXRfbGFzdF9jaGVja3BvaW50KHRyYWluZXIuYXJncy5vdXRwdXRfZGlyKSUwQWZwMzJfbW9kZWwlMjAlM0QlMjBsb2FkX3N0YXRlX2RpY3RfZnJvbV96ZXJvX2NoZWNrcG9pbnQodHJhaW5lci5tb2RlbCUyQyUyMGNoZWNrcG9pbnRfZGlyKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers.trainer_utils <span class="hljs-keyword">import</span> get_last_checkpoint | |
| <span class="hljs-keyword">from</span> deepspeed.utils.zero_to_fp32 <span class="hljs-keyword">import</span> load_state_dict_from_zero_checkpoint | |
| checkpoint_dir = get_last_checkpoint(trainer.args.output_dir) | |
| fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)`,wrap:!1}}),w=new g({props:{code:"ZnJvbSUyMGRlZXBzcGVlZC51dGlscy56ZXJvX3RvX2ZwMzIlMjBpbXBvcnQlMjBsb2FkX3N0YXRlX2RpY3RfZnJvbV96ZXJvX2NoZWNrcG9pbnQlMEElMEFjaGVja3BvaW50X2RpciUyMCUzRCUyMG9zLnBhdGguam9pbih0cmFpbmVyLmFyZ3Mub3V0cHV0X2RpciUyQyUyMCUyMmNoZWNrcG9pbnQtZmluYWwlMjIpJTBBdHJhaW5lci5kZWVwc3BlZWQuc2F2ZV9jaGVja3BvaW50KGNoZWNrcG9pbnRfZGlyKSUwQWZwMzJfbW9kZWwlMjAlM0QlMjBsb2FkX3N0YXRlX2RpY3RfZnJvbV96ZXJvX2NoZWNrcG9pbnQodHJhaW5lci5tb2RlbCUyQyUyMGNoZWNrcG9pbnRfZGlyKQ==",highlighted:`<span class="hljs-keyword">from</span> deepspeed.utils.zero_to_fp32 <span class="hljs-keyword">import</span> load_state_dict_from_zero_checkpoint | |
| checkpoint_dir = os.path.join(trainer.args.output_dir, <span class="hljs-string">"checkpoint-final"</span>) | |
| trainer.deepspeed.save_checkpoint(checkpoint_dir) | |
| fp32_model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)`,wrap:!1}}),S=new al({props:{$$slots:{default:[iJ]},$$scope:{ctx:A}}}),G=new g({props:{code:"ZnJvbSUyMGRlZXBzcGVlZC51dGlscy56ZXJvX3RvX2ZwMzIlMjBpbXBvcnQlMjBnZXRfZnAzMl9zdGF0ZV9kaWN0X2Zyb21femVyb19jaGVja3BvaW50JTBBJTBBc3RhdGVfZGljdCUyMCUzRCUyMGdldF9mcDMyX3N0YXRlX2RpY3RfZnJvbV96ZXJvX2NoZWNrcG9pbnQoY2hlY2twb2ludF9kaXIpJTIwJTIwJTIzJTIwY3B1JUVDJTk3JTkwJTIwJUVDJTlEJUI0JUVCJUFGJUI4JTIwJUVDJUExJUI0JUVDJTlFJUFDJUVEJTk1JUE4JTBBbW9kZWwlMjAlM0QlMjBtb2RlbC5jcHUoKSUwQW1vZGVsLmxvYWRfc3RhdGVfZGljdChzdGF0ZV9kaWN0KQ==",highlighted:`<span class="hljs-keyword">from</span> deepspeed.utils.zero_to_fp32 <span class="hljs-keyword">import</span> get_fp32_state_dict_from_zero_checkpoint | |
| state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) <span class="hljs-comment"># cpu에 이미 존재함</span> | |
| model = model.cpu() | |
| model.load_state_dict(state_dict)`,wrap:!1}}),L=new k({props:{title:"오프라인 환경",local:"offline",headingTag:"h3"}}),P=new g({props:{code:"JTI0JTIwbHMlMjAtbCUyMG91dHB1dF9kaXIlMkZjaGVja3BvaW50LTElMkYlMEEtcnctcnctci0tJTIwMSUyMHN0YXMlMjBzdGFzJTIwMS40SyUyME1hciUyMDI3JTIwMjAlM0E0MiUyMGNvbmZpZy5qc29uJTBBZHJ3eHJ3eHIteCUyMDIlMjBzdGFzJTIwc3RhcyUyMDQuMEslMjBNYXIlMjAyNSUyMDE5JTNBNTIlMjBnbG9iYWxfc3RlcDElMkYlMEEtcnctcnctci0tJTIwMSUyMHN0YXMlMjBzdGFzJTIwJTIwJTIwMTIlMjBNYXIlMjAyNyUyMDEzJTNBMTYlMjBsYXRlc3QlMEEtcnctcnctci0tJTIwMSUyMHN0YXMlMjBzdGFzJTIwODI3SyUyME1hciUyMDI3JTIwMjAlM0E0MiUyMG9wdGltaXplci5wdCUwQS1ydy1ydy1yLS0lMjAxJTIwc3RhcyUyMHN0YXMlMjAyMzFNJTIwTWFyJTIwMjclMjAyMCUzQTQyJTIwcHl0b3JjaF9tb2RlbC5iaW4lMEEtcnctcnctci0tJTIwMSUyMHN0YXMlMjBzdGFzJTIwJTIwNjIzJTIwTWFyJTIwMjclMjAyMCUzQTQyJTIwc2NoZWR1bGVyLnB0JTBBLXJ3LXJ3LXItLSUyMDElMjBzdGFzJTIwc3RhcyUyMDEuOEslMjBNYXIlMjAyNyUyMDIwJTNBNDIlMjBzcGVjaWFsX3Rva2Vuc19tYXAuanNvbiUwQS1ydy1ydy1yLS0lMjAxJTIwc3RhcyUyMHN0YXMlMjA3NzRLJTIwTWFyJTIwMjclMjAyMCUzQTQyJTIwc3BpZWNlLm1vZGVsJTBBLXJ3LXJ3LXItLSUyMDElMjBzdGFzJTIwc3RhcyUyMDEuOUslMjBNYXIlMjAyNyUyMDIwJTNBNDIlMjB0b2tlbml6ZXJfY29uZmlnLmpzb24lMEEtcnctcnctci0tJTIwMSUyMHN0YXMlMjBzdGFzJTIwJTIwMzM5JTIwTWFyJTIwMjclMjAyMCUzQTQyJTIwdHJhaW5lcl9zdGF0ZS5qc29uJTBBLXJ3LXJ3LXItLSUyMDElMjBzdGFzJTIwc3RhcyUyMDIuM0slMjBNYXIlMjAyNyUyMDIwJTNBNDIlMjB0cmFpbmluZ19hcmdzLmJpbiUwQS1yd3hydy1yLS0lMjAxJTIwc3RhcyUyMHN0YXMlMjA1LjVLJTIwTWFyJTIwMjclMjAxMyUzQTE2JTIwemVyb190b19mcDMyLnB5Kg==",highlighted:`$ <span class="hljs-built_in">ls</span> -l output_dir/checkpoint-1/ | |
| -rw-rw-r-- 1 stas stas 1.4K Mar 27 20:42 config.json | |
| drwxrwxr-x 2 stas stas 4.0K Mar 25 19:52 global_step1/ | |
| -rw-rw-r-- 1 stas stas 12 Mar 27 13:16 latest | |
| -rw-rw-r-- 1 stas stas 827K Mar 27 20:42 optimizer.pt | |
| -rw-rw-r-- 1 stas stas 231M Mar 27 20:42 pytorch_model.bin | |
| -rw-rw-r-- 1 stas stas 623 Mar 27 20:42 scheduler.pt | |
| -rw-rw-r-- 1 stas stas 1.8K Mar 27 20:42 special_tokens_map.json | |
| -rw-rw-r-- 1 stas stas 774K Mar 27 20:42 spiece.model | |
| -rw-rw-r-- 1 stas stas 1.9K Mar 27 20:42 tokenizer_config.json | |
| -rw-rw-r-- 1 stas stas 339 Mar 27 20:42 trainer_state.json | |
| -rw-rw-r-- 1 stas stas 2.3K Mar 27 20:42 training_args.bin | |
| -rwxrw-r-- 1 stas stas 5.5K Mar 27 13:16 zero_to_fp32.py*`,wrap:!1}}),Ml=new g({props:{code:"cHl0aG9uJTIwemVyb190b19mcDMyLnB5JTIwLiUyMHB5dG9yY2hfbW9kZWwuYmlu",highlighted:'python zero_to_fp32.py . pytorch_model.<span class="hljs-built_in">bin</span>',wrap:!1}}),x=new al({props:{$$slots:{default:[oJ]},$$scope:{ctx:A}}}),{c(){s=o("p"),s.textContent=i,U=a(),r(p.$$.fragment),n=a(),J=o("p"),J.textContent=y,Q=a(),r(f.$$.fragment),R=a(),E=o("p"),E.innerHTML=b,Z=a(),r(w.$$.fragment),B=a(),r(S.$$.fragment),O=a(),_=o("p"),_.textContent=h,D=a(),r(G.$$.fragment),Y=a(),r(L.$$.fragment),H=a(),X=o("p"),X.innerHTML=K,ll=a(),q=o("p"),q.textContent=el,v=a(),r(P.$$.fragment),$=a(),F=o("p"),F.innerHTML=sl,tl=a(),r(Ml.$$.fragment),nl=a(),r(x.$$.fragment)},l(V){s=j(V,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1xam4d0"&&(s.textContent=i),U=T(V),I(p.$$.fragment,V),n=T(V),J=j(V,"P",{"data-svelte-h":!0}),C(J)!=="svelte-1e2hpou"&&(J.textContent=y),Q=T(V),I(f.$$.fragment,V),R=T(V),E=j(V,"P",{"data-svelte-h":!0}),C(E)!=="svelte-fej7o2"&&(E.innerHTML=b),Z=T(V),I(w.$$.fragment,V),B=T(V),I(S.$$.fragment,V),O=T(V),_=j(V,"P",{"data-svelte-h":!0}),C(_)!=="svelte-o7ep8v"&&(_.textContent=h),D=T(V),I(G.$$.fragment,V),Y=T(V),I(L.$$.fragment,V),H=T(V),X=j(V,"P",{"data-svelte-h":!0}),C(X)!=="svelte-8kvr23"&&(X.innerHTML=K),ll=T(V),q=j(V,"P",{"data-svelte-h":!0}),C(q)!=="svelte-1vnc9y9"&&(q.textContent=el),v=T(V),I(P.$$.fragment,V),$=T(V),F=j(V,"P",{"data-svelte-h":!0}),C(F)!=="svelte-woivpp"&&(F.innerHTML=sl),tl=T(V),I(Ml.$$.fragment,V),nl=T(V),I(x.$$.fragment,V)},m(V,N){t(V,s,N),t(V,U,N),c(p,V,N),t(V,n,N),t(V,J,N),t(V,Q,N),c(f,V,N),t(V,R,N),t(V,E,N),t(V,Z,N),c(w,V,N),t(V,B,N),c(S,V,N),t(V,O,N),t(V,_,N),t(V,D,N),c(G,V,N),t(V,Y,N),c(L,V,N),t(V,H,N),t(V,X,N),t(V,ll,N),t(V,q,N),t(V,v,N),c(P,V,N),t(V,$,N),t(V,F,N),t(V,tl,N),c(Ml,V,N),t(V,nl,N),c(x,V,N),ml=!0},p(V,N){const dl={};N&2&&(dl.$$scope={dirty:N,ctx:V}),S.$set(dl);const Ul={};N&2&&(Ul.$$scope={dirty:N,ctx:V}),x.$set(Ul)},i(V){ml||(u(p.$$.fragment,V),u(f.$$.fragment,V),u(w.$$.fragment,V),u(S.$$.fragment,V),u(G.$$.fragment,V),u(L.$$.fragment,V),u(P.$$.fragment,V),u(Ml.$$.fragment,V),u(x.$$.fragment,V),ml=!0)},o(V){m(p.$$.fragment,V),m(f.$$.fragment,V),m(w.$$.fragment,V),m(S.$$.fragment,V),m(G.$$.fragment,V),m(L.$$.fragment,V),m(P.$$.fragment,V),m(Ml.$$.fragment,V),m(x.$$.fragment,V),ml=!1},d(V){V&&(e(s),e(U),e(n),e(J),e(Q),e(R),e(E),e(Z),e(B),e(O),e(_),e(D),e(Y),e(H),e(X),e(ll),e(q),e(v),e($),e(F),e(tl),e(nl)),d(p,V),d(f,V),d(w,V),d(S,V),d(G,V),d(L,V),d(P,V),d(Ml,V),d(x,V)}}}function CJ(A){let s,i,U,p;return s=new z({props:{id:"save",option:"fp16",$$slots:{default:[yJ]},$$scope:{ctx:A}}}),U=new z({props:{id:"save",option:"fp32",$$slots:{default:[jJ]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment)},l(n){I(s.$$.fragment,n),i=T(n),I(U.$$.fragment,n)},m(n,J){c(s,n,J),t(n,i,J),c(U,n,J),p=!0},p(n,J){const y={};J&2&&(y.$$scope={dirty:J,ctx:n}),s.$set(y);const Q={};J&2&&(Q.$$scope={dirty:J,ctx:n}),U.$set(Q)},i(n){p||(u(s.$$.fragment,n),u(U.$$.fragment,n),p=!0)},o(n){m(s.$$.fragment,n),m(U.$$.fragment,n),p=!1},d(n){n&&e(i),d(s,n),d(U,n)}}}function wJ(A){let s,i='모든 것이 자동으로 처리되기를 원한다면, <code>Trainer</code>와 함께 DeepSpeed를 사용해 보세요! <a href="https://www.deepspeed.ai/" rel="nofollow">DeepSpeed 문서</a>를 참조하여 설정 파일에서 매개변수 값을 수동으로 구성해야 합니다(<code>"auto"</code> 값은 사용할 수 없음).';return{c(){s=o("p"),s.innerHTML=i},l(U){s=j(U,"P",{"data-svelte-h":!0}),C(s)!=="svelte-kevebx"&&(s.innerHTML=i)},m(U,p){t(U,s,p)},p:W,d(U){U&&e(s)}}}function rJ(A){let s,i;return s=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycy5pbnRlZ3JhdGlvbnMlMjBpbXBvcnQlMjBIZkRlZXBTcGVlZENvbmZpZyUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWwlMEFpbXBvcnQlMjBkZWVwc3BlZWQlMEElMEFkc19jb25maWclMjAlM0QlMjAlN0IuLi4lN0QlMjAlMjAlMjMlMjBkZWVwc3BlZWQlMjAlRUMlODQlQTQlRUMlQTAlOTUlMjAlRUElQjAlOUQlRUMlQjIlQjQlMjAlRUIlOTglOTAlRUIlOEElOTQlMjAlRUQlOEMlOEMlRUMlOUQlQkMlMjAlRUElQjIlQkQlRUIlQTElOUMlMEElMjMlMjBaZXJvJTIwMyVFQiVBNSVCQyUyMCVFQSVCMCU5MCVFQyVBNyU4MCVFRCU5NSU5OCVFQSVCOCVCMCUyMCVFQyU5QyU4NCVFRCU5NSVCNCUyMCVFQiVBQSVBOCVFQiU4RCVCOCVFQyU5RCU4NCUyMCVFQyU5RCVCOCVFQyU4QSVBNCVFRCU4NCVCNCVFQyU4QSVBNCVFRCU5OSU5NCVFRCU5NSU5OCVFQSVCOCVCMCUyMCVFQyVBMCU4NCVFQyU5NyU5MCUyMCVFQiVCMCU5OCVFQiU5MyU5QyVFQyU4QiU5QyUyMCVFQyU4QiVBNCVFRCU5NiU4OSVFRCU5NSVCNCVFQyU5NSVCQyUyMCVFRCU5NSVBOSVFQiU4QiU4OCVFQiU4QiVBNCUwQWRzY2hmJTIwJTNEJTIwSGZEZWVwU3BlZWRDb25maWcoZHNfY29uZmlnKSUyMCUyMCUyMyUyMCVFQyU5RCVCNCUyMCVFQSVCMCU5RCVFQyVCMiVCNCVFQiVBNSVCQyUyMCVFQyU5QyVBMCVFQyVBNyU4MCVFRCU5NSU5OCVFQyU4NCVCOCVFQyU5QSU5NC4lMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIpJTBBZW5naW5lJTIwJTNEJTIwZGVlcHNwZWVkLmluaXRpYWxpemUobW9kZWwlM0Rtb2RlbCUyQyUyMGNvbmZpZ19wYXJhbXMlM0Rkc19jb25maWclMkMlMjAuLi4p",highlighted:`<span class="hljs-keyword">from</span> transformers.integrations <span class="hljs-keyword">import</span> HfDeepSpeedConfig | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModel | |
| <span class="hljs-keyword">import</span> deepspeed | |
| ds_config = {...} <span class="hljs-comment"># deepspeed 설정 객체 또는 파일 경로</span> | |
| <span class="hljs-comment"># Zero 3를 감지하기 위해 모델을 인스턴스화하기 전에 반드시 실행해야 합니다</span> | |
| dschf = HfDeepSpeedConfig(ds_config) <span class="hljs-comment"># 이 객체를 유지하세요.</span> | |
| model = AutoModel.from_pretrained(<span class="hljs-string">"openai-community/gpt2"</span>) | |
| engine = deepspeed.initialize(model=model, config_params=ds_config, ...)`,wrap:!1}}),{c(){r(s.$$.fragment)},l(U){I(s.$$.fragment,U)},m(U,p){c(s,U,p),i=!0},p:W,i(U){i||(u(s.$$.fragment,U),i=!0)},o(U){m(s.$$.fragment,U),i=!1},d(U){d(s,U)}}}function IJ(A){let s,i="<code>HfDeepSpeedConfig</code> is not required for ZeRO-1 or ZeRO-2.",U,p,n;return p=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycy5pbnRlZ3JhdGlvbnMlMjBpbXBvcnQlMjBIZkRlZXBTcGVlZENvbmZpZyUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWwlMkMlMjBBdXRvQ29uZmlnJTBBaW1wb3J0JTIwZGVlcHNwZWVkJTBBJTBBZHNfY29uZmlnJTIwJTNEJTIwJTdCLi4uJTdEJTIwJTIwJTIzJTIwZGVlcHNwZWVkJTIwJUVDJTg0JUE0JUVDJUEwJTk1JTIwJUVBJUIwJTlEJUVDJUIyJUI0JTIwJUVCJTk4JTkwJUVCJThBJTk0JTIwJUVEJThDJThDJUVDJTlEJUJDJTIwJUVBJUIyJUJEJUVCJUExJTlDJTBBJTIzJTIwWmVybyUyMDMlRUIlQTUlQkMlMjAlRUElQjAlOTAlRUMlQTclODAlRUQlOTUlOTglRUElQjglQjAlMjAlRUMlOUMlODQlRUQlOTUlQjQlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOUQlODQlMjAlRUMlOUQlQjglRUMlOEElQTQlRUQlODQlQjQlRUMlOEElQTQlRUQlOTklOTQlRUQlOTUlOTglRUElQjglQjAlMjAlRUMlQTAlODQlRUMlOTclOTAlMjAlRUIlQjAlOTglRUIlOTMlOUMlRUMlOEIlOUMlMjAlRUMlOEIlQTQlRUQlOTYlODklRUQlOTUlQjQlRUMlOTUlQkMlMjAlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQlMEFkc2NoZiUyMCUzRCUyMEhmRGVlcFNwZWVkQ29uZmlnKGRzX2NvbmZpZyklMjAlMjAlMjMlMjAlRUMlOUQlQjQlMjAlRUElQjAlOUQlRUMlQjIlQjQlRUIlQTUlQkMlMjAlRUMlOUMlQTAlRUMlQTclODAlRUQlOTUlOTglRUMlODQlQjglRUMlOUElOTQuJTBBY29uZmlnJTIwJTNEJTIwQXV0b0NvbmZpZy5mcm9tX3ByZXRyYWluZWQoJTIyb3BlbmFpLWNvbW11bml0eSUyRmdwdDIlMjIpJTBBbW9kZWwlMjAlM0QlMjBBdXRvTW9kZWwuZnJvbV9jb25maWcoY29uZmlnKSUwQWVuZ2luZSUyMCUzRCUyMGRlZXBzcGVlZC5pbml0aWFsaXplKG1vZGVsJTNEbW9kZWwlMkMlMjBjb25maWdfcGFyYW1zJTNEZHNfY29uZmlnJTJDJTIwLi4uKQ==",highlighted:`<span class="hljs-keyword">from</span> transformers.integrations <span class="hljs-keyword">import</span> HfDeepSpeedConfig | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModel, AutoConfig | |
| <span class="hljs-keyword">import</span> deepspeed | |
| ds_config = {...} <span class="hljs-comment"># deepspeed 설정 객체 또는 파일 경로</span> | |
| <span class="hljs-comment"># Zero 3를 감지하기 위해 모델을 인스턴스화하기 전에 반드시 실행해야 합니다</span> | |
| dschf = HfDeepSpeedConfig(ds_config) <span class="hljs-comment"># 이 객체를 유지하세요.</span> | |
| config = AutoConfig.from_pretrained(<span class="hljs-string">"openai-community/gpt2"</span>) | |
| model = AutoModel.from_config(config) | |
| engine = deepspeed.initialize(model=model, config_params=ds_config, ...)`,wrap:!1}}),{c(){s=o("p"),s.innerHTML=i,U=a(),r(p.$$.fragment)},l(J){s=j(J,"P",{"data-svelte-h":!0}),C(s)!=="svelte-1i7ih4n"&&(s.innerHTML=i),U=T(J),I(p.$$.fragment,J)},m(J,y){t(J,s,y),t(J,U,y),c(p,J,y),n=!0},p:W,i(J){n||(u(p.$$.fragment,J),n=!0)},o(J){m(p.$$.fragment,J),n=!1},d(J){J&&(e(s),e(U)),d(p,J)}}}function cJ(A){let s,i,U,p;return s=new z({props:{id:"models",option:"pretrained model",$$slots:{default:[rJ]},$$scope:{ctx:A}}}),U=new z({props:{id:"models",option:"non-pretrained model",$$slots:{default:[IJ]},$$scope:{ctx:A}}}),{c(){r(s.$$.fragment),i=a(),r(U.$$.fragment)},l(n){I(s.$$.fragment,n),i=T(n),I(U.$$.fragment,n)},m(n,J){c(s,n,J),t(n,i,J),c(U,n,J),p=!0},p(n,J){const y={};J&2&&(y.$$scope={dirty:J,ctx:n}),s.$set(y);const Q={};J&2&&(Q.$$scope={dirty:J,ctx:n}),U.$set(Q)},i(n){p||(u(s.$$.fragment,n),u(U.$$.fragment,n),p=!0)},o(n){m(s.$$.fragment,n),m(U.$$.fragment,n),p=!1},d(n){n&&e(i),d(s,n),d(U,n)}}}function uJ(A){let s,i,U,p,n,J,y,Q='<a href="https://www.deepspeed.ai/" rel="nofollow">DeepSpeed</a>는 분산 학습 메모리를 효율적이고 빠르게 만드는 PyTorch 최적화 라이브러리입니다. 그 핵심은 대규모 모델을 규모에 맞게 훈련할 수 있는 <a href="https://hf.co/papers/1910.02054" rel="nofollow">Zero Redundancy Optimizer(ZeRO)</a>입니다. ZeRO는 여러 단계로 작동합니다:',f,R,E="<li>ZeRO-1, GPU 간 최적화 상태 분할</li> <li>ZeRO-2, GPU 간 그레이디언트 분할</li> <li>ZeRO-3, GPU 간 매개변수 분할</li>",b,Z,w="GPU가 제한된 환경에서 ZeRO는 최적화 메모리와 계산을 GPU에서 CPU로 오프로드하여 단일 GPU에 대규모 모델을 장착하고 훈련할 수 있습니다. DeepSpeed는 모든 ZeRO 단계 및 오프로딩을 위해 Transformers <code>Trainer</code> 클래스와 통합되어 있습니다. 구성 파일을 제공하거나 제공된 템플릿을 사용하기만 하면 됩니다. 추론의 경우, Transformers는 대용량 모델을 가져올 수 있으므로 ZeRO-3 및 오프로딩을 지원합니다.",B,S,O="이 가이드에서는 DeepSpeed 트레이닝을 배포하는 방법, 활성화할 수 있는 기능, 다양한 ZeRO 단계에 대한 구성 파일 설정 방법, 오프로딩, 추론 및 <code>Trainer</code> 없이 DeepSpeed를 사용하는 방법을 안내해 드립니다.",_,h,D,G,Y='DeepSpeed는 PyPI 또는 Transformers에서 설치할 수 있습니다(자세한 설치 옵션은 DeepSpeed <a href="https://www.deepspeed.ai/tutorials/advanced-install/" rel="nofollow">설치 상세사항</a> 또는 GitHub <a href="https://github.com/microsoft/deepspeed#installation" rel="nofollow">README</a>를 참조하세요).',L,H,X,K,ll,q,el,v,P='시작하기 전에 모델에 맞는 충분한 GPU 및 CPU 메모리가 있는지 확인하는 것이 좋습니다. DeepSpeed는 필요한 CPU/GPU 메모리를 추정할 수 있는 도구를 제공합니다. 예를 들어, 단일 GPU에서 <a href="bigscience/T0_3B">bigscience/T0_3B</a> 모델의 메모리 요구 사항을 추정할 수 있습니다:',$,F,sl,tl,Ml="즉, CPU 오프로드가 없는 단일 80GB GPU 또는 오프로드 할 8GB GPU와 최대 60GB CPU가 필요합니다 (이는 매개변수, 최적화 상태 및 그레이디언트에 대한 메모리 요구 사항일 뿐이며 CUDA 커널 및 활성화에는 조금 더 필요합니다). 또한 더 작은 GPU를 대여하거나 구입하는 것이 더 저렴하지만 모델을 훈련하는 데 시간이 더 오래 걸리므로 비용과 속도 간의 균형을 고려해야 합니다.",nl,x,ml="GPU 메모리가 충분하다면 CPU/NVMe 오프로드를 비활성화하여 모든 작업을 더 빠르게 처리하세요.",V,N,dl,Ul,HM="DeepSpeed를 설치하고 메모리 요구 사항을 더 잘 파악했다면 다음 단계는 사용할 ZeRO 스테이지를 선택하는 것입니다. 가장 빠르고 메모리 효율이 높은 순서대로 정렬하면 다음과 같습니다:",Rt,fl,xM="<thead><tr><th>속도</th> <th>메모리 효율</th></tr></thead> <tbody><tr><td>ZeRO-1</td> <td>ZeRO-3 + offload</td></tr> <tr><td>ZeRO-2</td> <td>ZeRO-3</td></tr> <tr><td>ZeRO-2 + offload</td> <td>ZeRO-2 + offload</td></tr> <tr><td>ZeRO-3</td> <td>ZeRO-2</td></tr> <tr><td>ZeRO-3 + offload</td> <td>ZeRO-1</td></tr></tbody>",Bt,Vl,YM="자신에게 가장 적합한 방법을 찾으려면 가장 빠른 방법부터 시작하고 메모리가 부족하면 더 느리지만 메모리 효율이 높은 다음 단계를 시도하세요. 속도와 메모리 사용량 사이의 적절한 균형을 찾기 위해 (가장 메모리 효율적이거나 가장 빠른 것부터 시작하여) 원하는 방향으로 자유롭게 작업하세요.",_t,Ql,LM="일반적으로 사용할 수 있는 프로세스는 다음과 같습니다(배치 크기 1로 시작):",gt,Al,PM="<li>그레이디언트 체크포인팅 활성화</li> <li>ZeRO-2 시도</li> <li>ZeRO-2와 매개변수 오프로드 시도</li> <li>ZeRO-3 시도</li> <li>ZeRO-3과 매개변수 CPU 오프로드 시도</li> <li>ZeRO-3, 매개변수와 옵티마이저 CPU 오프로드 시도</li> <li><code>generate()</code> 메소드를 사용하는 경우 더 좁은 빔 서치 검색 범위와 같은 다양한 기본값을 낮춰보기</li> <li>전체 정밀도 가중치보다 반정밀도(구형 GPU 구조의 경우 fp16, 암페어 이후 GPU의 경우 bf16)를 혼합해보기</li> <li>가능하면 하드웨어를 더 추가하거나 Infinity가 매개변수와 옵티마이저를 NVMe로 오프로드하도록 활성화</li> <li>메모리가 부족하지 않으면 유효 처리량을 측정한 다음 배치 크기를 최대한 크게 늘려 GPU 효율성을 극대화</li> <li>마지막으로 일부 오프로드 기능을 비활성화하거나 더 빠른 ZeRO 스테이지를 사용하고 배치 크기를 늘리거나 줄여 속도와 메모리 사용량 간의 최적의 균형을 찾아 트레이닝 설정을 최적화</li>",bt,$l,Et,hl,KM="DeepSpeed는 트레이닝 실행 방법을 구성하는 모든 매개변수가 포함된 구성 파일을 통해 <code>Trainer</code> 클래스와 함께 작동합니다. 트레이닝 스크립트를 실행하면 DeepSpeed는 <code>Trainer</code>로부터 받은 구성을 콘솔에 기록하므로 어떤 구성이 사용되었는지 정확히 확인할 수 있습니다.",Zt,Tl,Nt,Rl,lU="명령줄 인터페이스에서 트레이닝하는 경우 DeepSpeed 구성 파일은 JSON 파일의 경로로 전달되거나 노트북 설정에서 <code>Trainer</code>를 사용하는 경우 중첩된 <code>dict</code> 객체로 전달됩니다.",Ft,pl,St,Bl,kt,_l,eU="구성 매개변수에는 세 가지 유형이 있습니다:",Ot,gl,tU="<li><p>일부 구성 매개변수는 <code>Trainer</code>와 DeepSpeed가 공유하며, 정의가 충돌하는 경우 오류를 식별하기 어려울 수 있습니다. 이러한 공유 구성 매개변수는 <code>Trainer</code> 명령줄 인수에서 쉽게 설정할 수 있습니다.</p></li> <li><p>모델 설정에서 자동으로 도출되는 일부 설정 매개변수는 수동으로 값을 조정할 필요가 없습니다. <code>Trainer</code>는 구성 값 <code>auto</code>를 사용하여 가장 정확하거나 효율적인 값을 설정합니다. 직접 구성 매개변수를 명시적으로 설정할 수도 있지만, <code>Trainer</code> 인수와 DeepSpeed 설정 매개변수가 일치하도록 주의해야 합니다. 일치하지 않으면 감지하기 매우 어려운 방식으로 훈련이 실패할 수 있습니다!</p></li> <li><p>교육 요구 사항에 따라 수동으로 설정해야 하는 일부 설정 매개변수는 DeepSpeed에만 해당됩니다.</p></li>",Dt,bl,sU="DeepSpeed 구성을 수정하고 <code>TrainingArguments</code>를 편집할 수도 있습니다:",Wt,El,MU="<li>기본 구성으로 사용할 DeepSpeed 구성 파일을 생성하거나 로드합니다.</li> <li>다음 DeepSpeed 구성을 기반으로 <code>TrainingArguments</code> 객체를 생성합니다.</li>",qt,Zl,UU="<code>scheduler.params.total_num_steps</code>와 같은 일부 값은 트레이닝 중 <code>Trainer</code>에 의해 계산됩니다.",vt,Nl,Gt,Fl,nU='세 가지 구성이 있으며, 각 구성은 서로 다른 ZeRO 단계에 해당합니다. 1단계는 확장성 측면에서 그다지 눈여겨볼만하지 않으므로 이 가이드에서는 2단계와 3단계에 중점을 둡니다. <code>zero_optimization</code> 구성에는 활성화할 항목과 구성 방법에 대한 모든 옵션이 포함되어 있습니다. 각 매개변수에 대한 자세한 설명은 <a href="https://www.deepspeed.ai/docs/config-json/" rel="nofollow">DeepSpeed 구성 JSON</a> 참조를 참조하세요.',Xt,yl,zt,Sl,JU="<code>Trainer</code>는 동등한 명령줄 인수를 제공하지 않으므로 다음 구성은 DeepSpeed로 설정해야 합니다.",Ht,il,xt,kl,Yt,Ol,aU='<a href="https://hf.co/papers/2104.07857" rel="nofollow">ZeRO-Infinity</a>를 사용하면 모델 상태를 CPU 및/또는 NVMe로 오프로드하여 더 많은 메모리를 절약할 수 있습니다. 스마트 파티셔닝 및 타일링 알고리즘을 통해 각 GPU는 오프로딩 중에 매우 적은 양의 데이터를 주고받을 수 있으므로 최신 NVMe는 훈련 프로세스에 사용할 수 있는 것보다 훨씬 더 큰 총 메모리 풀에 맞출 수 있습니다. ZeRO-Infinity에는 ZeRO-3가 필요합니다.',Lt,Dl,TU='사용 가능한 CPU 및/또는 NVMe 메모리에 따라 <a href="https://www.deepspeed.ai/docs/config-json/#optimizer-offloading" rel="nofollow">옵티마이저</a>와 <a href="https://www.deepspeed.ai/docs/config-json/#parameter-offloading" rel="nofollow">매개변수</a> 중 하나만 오프로드하거나 아무것도 오프로드하지 않을 수 있습니다. 또한 일반 하드 드라이브나 솔리드 스테이트 드라이브에서도 작동하지만 속도가 현저히 느려지므로 <code>nvme_path</code>가 NVMe 장치를 가리키고 있는지 확인해야 합니다. 최신 NVMe를 사용하면 읽기 작업의 경우 최대 3.5GB/s, 쓰기 작업의 경우 최대 3GB/s의 전송 속도를 기대할 수 있습니다. 마지막으로, 트레이닝 설정에서 <a href="https://github.com/microsoft/DeepSpeed/issues/998" rel="nofollow">벤치마크 실행하기</a>을 통해 최적의 ‘aio’ 구성을 결정합니다.',Pt,Wl,pU="아래 예제 ZeRO-3/Infinity 구성 파일은 대부분의 매개변수 값을 <code>auto</code>으로 설정하고 있지만, 수동으로 값을 추가할 수도 있습니다.",Kt,ql,ls,vl,es,Gl,yU="이 섹션에서 간략하게 설명하는 몇 가지 중요한 매개변수를 DeepSpeed 구성 파일에 지정할 수 있습니다.",ts,Xl,ss,zl,iU="활성화 및 그레이디언트 체크포인팅은 속도를 더 많은 GPU 메모리와 교환하여 GPU 메모리가 부족한 상황을 극복하거나 배치 크기를 늘려 성능을 향상시킬 수 있습니다. 이 기능을 활성화하려면 다음과 같이 하세요:",Ms,Hl,oU='<li>허깅 페이스 모델의 경우, <code>Trainer</code>에서 <code>model.gradient_checkpointing_enable()</code> 또는 <code>--gradient_checkpointing</code>을 설정합니다.</li> <li>허깅 페이스가 아닌 모델의 경우, 딥스피드 <a href="https://deepspeed.readthedocs.io/en/latest/activation-checkpointing.html" rel="nofollow">Activation Checkpointing API</a>를 사용합니다. 트랜스포머 모델링 코드를 대체하고 <code>torch.utils.checkpoint</code>를 DeepSpeed API로 대체할 수도 있습니다. 이 접근 방식은 순방향 활성화를 다시 계산하는 대신 CPU 메모리로 오프로드할 수 있으므로 더 유연합니다.</li>',Us,xl,ns,Yl,jU="<code>offload_optimizer</code>를 활성화하지 않는 한 DeepSpeed와 트랜스포머 옵티마이저 및 스케줄러를 혼합하여 사용할 수 있습니다. <code>offload_optimizer</code>를 활성화하면 CPU와 GPU 구현이 모두 있는 경우 DeepSpeed가 아닌 최적화기(LAMB 제외)를 사용할 수 있습니다.",Js,ol,as,jl,Ts,Ll,ps,Pl,CU="DeepSpeed는 fp32, fp16 및 bf16 혼합 정밀도를 지원합니다.",ys,Cl,is,Kl,os,le,wU="배치 크기는 자동으로 구성하거나 명시적으로 설정할 수 있습니다. <code>"auto"</code> 옵션을 사용하도록 선택하면 <code>Trainer</code>는 <code>train_micro_batch_size_per_gpu</code>를 args.<code>per_device_train_batch_size</code>의 값으로, <code>train_batch_size</code>를 <code>args.world_size * args.per_device_train_batch_size * args.gradient_accumulation_steps</code>로 설정합니다.",js,ee,Cs,te,ws,se,rU="그레이디언트 누적을 자동으로 구성하거나 명시적으로 설정할 수 있습니다. <code>"auto"</code> 옵션을 사용하도록 선택하면 <code>Trainer</code>가 <code>args.gradient_accumulation_steps</code>의 값으로 설정합니다.",rs,Me,Is,Ue,cs,ne,IU="그레이디언트 클리핑은 자동으로 구성하거나 명시적으로 설정할 수 있습니다. <code>"auto"</code> 옵션을 사용하도록 선택하면 <code>Trainer</code>가 <code>args.max_grad_norm</code>의 값으로 설정합니다.",us,Je,ms,ae,ds,Te,cU="축소, 수집 및 분산 작업과 같은 통신 집합체의 경우 별도의 데이터 유형이 사용됩니다.",fs,pe,uU="모든 수집 및 분산 작업은 데이터와 동일한 데이터 유형으로 수행됩니다. 예를 들어 bf16으로 훈련하는 경우, 수집은 비손실 연산이므로 데이터도 bf16으로 수집됩니다.",Vs,ye,mU="예를 들어 그레이디언트가 여러 GPU에 걸쳐 평균화되는 경우와 같이 감소 연산은 손실이 발생합니다. 통신이 fp16 또는 bf16으로 수행되는 경우, 낮은 정밀도로 여러 숫자를 더하면 정확하지 않기 때문에 손실이 발생할 가능성이 더 높습니다. 특히 fp16보다 정밀도가 낮은 bf16의 경우 더욱 그렇습니다. 이러한 이유로 기울기를 평균화할 때 손실이 최소화되므로 감소 연산에는 fp16이 기본값으로 사용됩니다.",Qs,ie,dU="통신 데이터 유형은 설정 파일에서 <code>communication_data_type</code> 매개변수를 설정하여 선택할 수 있습니다. 예를 들어, fp32를 선택하면 약간의 오버헤드가 추가되지만 감소 연산이 fp32에 누적되고 준비가 되면 훈련 중인 반정밀 dtype으로 다운캐스트됩니다.",As,oe,$s,je,hs,Ce,fU='<a href="https://pytorch.org/docs/stable/elastic/run.html" rel="nofollow">torchrun</a>, <code>deepspeed</code> 런처 또는 <a href="https://huggingface.co/docs/accelerate/basic_tutorials/launch#using-accelerate-launch" rel="nofollow">Accelerate</a> 등 다양한 런처를 통해 DeepSpeed를 배포할 수 있습니다. 배포하려면 <code>Trainer</code> 명령줄에 <code>--deepspeed ds_config.json</code>을 추가합니다. 필요한 명령줄 인수를 코드에 추가하려면 DeepSpeed의 <a href="https://deepspeed.readthedocs.io/en/latest/initialize.html#argument-parsing" rel="nofollow"><code>add_config_arguments</code></a> 유틸리티를 사용하는 것이 좋습니다.',Rs,we,VU='이 가이드에서는 다양한 트레이닝 설정에 대해 <code>deepspeed</code> 런처로 DeepSpeed를 배포하는 방법을 보여드립니다. 보다 실용적인 사용 예제는 이 <a href="https://github.com/huggingface/transformers/issues/8771#issuecomment-759248400" rel="nofollow">post</a>에서 확인할 수 있습니다.',Bs,wl,_s,re,gs,Ie,QU="노드는 워크로드를 실행하기 위한 하나 이상의 GPU입니다. 더 강력한 설정은 멀티 노드 설정으로, <code>deepspeed</code> 런처로 실행할 수 있습니다. 이 가이드에서는 각각 8개의 GPU가 있는 두 개의 노드가 있다고 가정해 보겠습니다. 첫 번째 노드는 <code>ssh hostname1</code>로, 두 번째 노드는 <code>ssh hostname2</code>로 접속할 수 있습니다. 두 노드 모두 비밀번호 없이 ssh를 통해 로컬로 서로 통신할 수 있어야 합니다.",bs,ce,AU='기본적으로 DeepSpeed는 멀티노드 환경에서 공유 저장소를 사용할 것으로 예상합니다. 그렇지 않고 각 노드가 로컬 파일 시스템만 볼 수 있는 경우, 공유 파일 시스템에 대한 액세스 없이 로딩할 수 있도록 <a href="https://www.deepspeed.ai/docs/config-json/#checkpoint-options" rel="nofollow"><code>checkpoint</code></a>를 포함하도록 구성 파일을 조정해야 합니다:',Es,ue,Zs,me,$U="<code>Trainer</code>의 `<code>--save_on_each_node</code> 인수를 사용하여 위의 <code>checkpoint</code>를 구성에 자동으로 추가할 수도 있습니다.",Ns,rl,Fs,de,Ss,fe,hU="SLURM 환경에서는 특정 SLURM 환경에 맞게 SLURM 스크립트를 조정해야 합니다.SLURM 스크립트 예시는 다음과 같습니다:",ks,Ve,Os,Qe,RU="그런 다음 모든 노드에서 동시에 학습을 시작하는 다음 명령을 사용하여 다중 노드 배포를 예약할 수 있습니다.",Ds,Ae,Ws,$e,qs,he,BU="<code>deepspeed</code> 런처는 노트북에서의 배포를 지원하지 않으므로 분산 환경을 에뮬레이션해야 합니다. 하지만 이는 1개의 GPU에서만 작동합니다. 1개 이상의 GPU를 사용하려면 딥스피드가 작동할 수 있는 다중 프로세스 환경을 사용해야 합니다. 즉, 여기에 표시된 것처럼 에뮬레이션할 수 없는 <code>deepspeed</code> 런처를 사용해야 합니다.",vs,Re,Gs,Be,_U="현재 디렉터리의 노트북에 구성 파일을 즉석에서 만들고 싶다면 전용 셀을 만들 수 있습니다.",Xs,_e,zs,ge,gU="트레이닝 스크립트가 노트북 셀이 아닌 파일에 있는 경우, 노트북 셀의 셸에서 <code>deepspeed</code>를 정상적으로 실행할 수 있습니다. 예를 들어 <code>run_translation.py</code>를 시작하려면 다음과 같이 하세요.:",Hs,be,xs,Ee,bU="또한 <code>%%bash</code> 매직을 사용하여 여러 줄의 코드를 작성하여 셸 프로그램을 실행할 수도 있지만 교육이 완료될 때까지 로그를 볼 수 없습니다. <code>%%bash</code> 매직으로 분산 환경을 에뮬레이션할 필요는 없습니다.",Ys,Ze,Ls,Ne,Ps,Fe,EU="딥스피드는 기본 고정밀 fp32 가중치를 사용자 지정 체크포인트 최적화 파일(glob 패턴은 <code>global_step*/*optim_states.pt</code>처럼 보입니다)에 저장하고 일반 체크포인트 아래에 저장합니다.",Ks,Il,lM,Se,eM,ke,ZU='<a href="https://www.deepspeed.ai/2022/09/09/zero-inference.html" rel="nofollow">ZeRO Inference</a>는 모델 가중치를 CPU 또는 NVMe 메모리에 배치하여 GPU에 부담을 주지 않으므로 GPU에서 대규모 모델을 사용하여 추론을 실행할 수 있습니다. 추론은 최적화 상태 및 그레이디언트에 많은 양의 메모리를 추가로 필요로 하지 않으므로 동일한 하드웨어에 훨씬 더 큰 배치 및/또는 시퀀스 길이를 맞출 수 있습니다.',tM,Oe,NU='ZeRO Inference는 <a href="#zero-configuration">ZeRO-3</a>와 동일한 구성 파일을 공유하며, ZeRO-2 및 ZeRO-1 구성은 추론에 아무런 이점을 제공하지 않으므로 작동하지 않습니다.',sM,De,FU="ZeRO Inference를 실행하려면 일반적인 훈련 인수를 <code>TrainingArguments</code> 클래스에 전달하고 <code>--do_eval</code> 인수를 추가합니다.",MM,We,UM,qe,nM,ve,SU="DeepSpeed는 <code>Trainer</code> 클래스가 없는 트랜스포머에서도 작동합니다. 이는 <code>from_pretrained()</code>를 호출할 때 ZeRO-3 매개변수를 수집하고 모델을 여러 GPU에 분할하는 작업만 처리하는 <code>HfDeepSpeedConfig</code>가 처리합니다.",JM,cl,aM,Ge,kU="ZeRO-3를 효율적으로 배포하려면 모델 앞에 <code>HfDeepSpeedConfig</code> 객체를 인스턴스화하고 해당 객체를 유지해야 합니다:",TM,ul,pM,Xe,yM,ze,OU="단일 GPU에 모델을 맞출 수 없는 경우 <code>Trainer</code>없이 ZeRO 추론을 실행하려면 추가 GPU를 사용하거나 CPU 메모리로 오프로드를 시도하세요. 여기서 이해해야 할 중요한 뉘앙스는 ZeRO가 설계된 방식에 따라 서로 다른 GPU에서 서로 다른 입력을 병렬로 처리할 수 있다는 것입니다.",iM,He,DU="반드시 확인하세요:",oM,xe,WU="<li>GPU 메모리가 충분한 경우 CPU 오프로드를 비활성화합니다(속도가 느려지므로).</li> <li>Ampere 이상의 GPU를 사용하는 경우 bf16을 활성화하면 속도가 빨라집니다. 이러한 GPU가 없는 경우 오버플로 오류가 발생할 수 있으므로 bf16으로 사전 학습된 모델(T5 모델)을 사용하지 않는 한 fp16을 활성화할 수 있습니다.</li>",jM,Ye,qU="단일 GPU에 맞지 않는 모델에서 <code>Trainer</code> 없이 ZeRO 추론을 실행하는 방법에 대한 더 나은 아이디어를 얻으려면 다음 스크립트를 살펴보시기 바랍니다.",CM,Le,wM,Pe,vU="스크립트를 t0.py로 저장하고 실행합니다:",rM,Ke,IM,lt,GU="이것은 매우 기본적인 예시이므로 사용 사례에 맞게 조정할 수 있습니다.",cM,et,uM,tt,XU="생성에 ZeRO-3와 함께 여러 개의 GPU를 사용하려면 <code>generate()</code> 메서드에서 <code>synced_gpus=True</code>를 설정하여 GPU를 동기화해야 합니다. 그렇지 않으면 한 GPU가 다른 GPU보다 먼저 생성을 완료하면 나머지 GPU가 먼저 완료한 GPU로부터 가중치 샤드를 받지 못하여 전체 시스템이 중단됩니다.",mM,st,zU="트랜스포머>=4.28의 경우, 생성 중에 여러 개의 GPU가 감지되면 <code>synced_gpus</code>가 자동으로 <code>True</code>로 설정됩니다.",dM,Mt,fM,Ut,HU='문제가 발생하면 DeepSpeed가 문제의 원인이 아닌 경우가 많으므로(아주 명백하고 예외적으로 DeepSpeed 모듈을 볼 수 있는 경우가 아니라면) DeepSpeed가 문제의 원인인지 고려해야 합니다! 첫 번째 단계는 DeepSpeed 없이 설정을 다시 시도하고 문제가 지속되면 문제를 신고하는 것입니다. 문제가 핵심적인 DeepSpeed 문제이고 transformers와 관련이 없는 경우, <a href="https://github.com/microsoft/DeepSpeed" rel="nofollow">DeepSpeed 리포지토리</a>에서 이슈를 개설하세요.',VM,nt,xU="transformers와 관련된 이슈를 개설할 때에는 다음 정보를 제공해 주세요:",QM,Jt,YU="<li>전체 DeepSpeed 구성 파일</li>",AM,at,LU="*<code>Trainer</code>의 명령줄 인수, 또는<code>Trainer</code> 설정을 직접 작성하는 경우<code>TrainingArguments</code> 인수(관련 없는 항목이 수십 개 있는 <code>TrainingArguments</code>는 덤프하지 마세요).",$M,Tt,PU="<li>다음 코드의 출력 결과:</li>",hM,pt,RM,yt,KU="<li><p>문제를 재현할 수 있는 Google Colab 노트북 링크</p></li> <li><p>불가능할 경우 기존 예제를 사용하여 문제를 재현할 수 있는 표준 및 사용자 지정이 아닌 데이터 집합을 사용할 수 있습니다.</p></li>",BM,it,ln="다음 섹션에서는 가장 일반적인 두 가지 문제를 해결하기 위한 가이드를 제공합니다.",_M,ot,gM,jt,en="실행 중에 트레이스백 없이 DeepSpeed 프로세스가 종료되면 일반적으로 프로그램이 시스템보다 많은 CPU 메모리를 할당하려고 시도했거나 프로세스가 허용된 것보다 많은 CPU 메모리를 할당하려고 시도하여 OS 커널이 프로세스를 종료했음을 의미합니다. 이 경우 구성 파일에 <code>offload_optimizer</code>, <code>offload_param</code> 또는 둘 다 CPU로 오프로드하도록 구성되어 있는지 확인하세요.",bM,Ct,tn='NVMe 및 ZeRO-3를 설정한 경우 NVMe로 오프로드를 실험해 보세요(모델의 메모리 요구 사항을 <a href="https://deepspeed.readthedocs.io/en/latest/memory.html" rel="nofollow">확인</a>하세요).',EM,wt,ZM,rt,sn="모델을 bf16으로 사전 훈련한 다음 fp16으로 사용하려고 할 때 NaN 손실이 발생하는 경우가 많습니다(특히 TPU 훈련 모델에 해당). 이 문제를 해결하려면 하드웨어가 이를 지원하는 경우(TPU, Ampere GPU 이상) fp32 또는 bf16을 사용하세요.",NM,It,Mn="다른 문제는 fp16 사용과 관련이 있을 수 있습니다. 예를 들어 이것이 fp16 구성인 경우입니다:",FM,ct,SM,ut,Un="로그에 다음과 같은 <code>OVERFLOW!</code> 메시지가 표시될 수 있습니다:",kM,mt,OM,dt,nn="이는 DeepSpeed 손실 스케일러가 손실 오버플로를 극복할 수 있는 스케일링 계수를 찾을 수 없음을 의미합니다. 이 문제를 해결하려면 <code>initial_scale_power</code> 값을 더 높게 설정하세요(일반적으로 32가 적절합니다).",DM,ft,WM,Vt,Jn='DeepSpeed ZeRO는 제한된 GPU 리소스로 추론을 위해 매우 큰 모델을 훈련하고 로드하는 강력한 기술로, 누구나 쉽게 사용할 수 있습니다. DeepSpeed에 대해 자세히 알아보려면 <a href="https://www.microsoft.com/en-us/research/search/?q=deepspeed" rel="nofollow">블로그 포스트</a>, <a href="https://www.deepspeed.ai/getting-started/" rel="nofollow">공식 문서</a>, <a href="https://github.com/microsoft/deepspeed" rel="nofollow">깃허브 리포지토리</a>를 참조하세요.',qM,Qt,an="다음 문서도 ZeRO에 대해 자세히 알아볼 수 있는 훌륭한 자료입니다:",vM,At,Tn='<li><a href="https://hf.co/papers/1910.02054" rel="nofollow">ZeRO: Memory Optimizations Toward Training Trillion Parameter Models</a></li> <li><a href="https://hf.co/papers/2101.06840" rel="nofollow">ZeRO-Offload: Democratizing Billion-Scale Model Training</a></li> <li><a href="https://hf.co/papers/2104.07857" rel="nofollow">ZeRO-Infinity: Breaking the GPU Memory Wall for Extreme Scale Deep Learning</a></li>',GM,$t,XM,ht,zM;return n=new k({props:{title:"DeepSpeed",local:"deepspeed",headingTag:"h1"}}),h=new k({props:{title:"설치",local:"installation",headingTag:"h2"}}),H=new al({props:{$$slots:{default:[Nn]},$$scope:{ctx:A}}}),K=new Jl({props:{id:"install",options:["PyPI","Transformers"],$$slots:{default:[kn]},$$scope:{ctx:A}}}),q=new k({props:{title:"메모리 요구량",local:"memory-requirements",headingTag:"h2"}}),F=new g({props:{code:"JTI0JTIwcHl0aG9uJTIwLWMlMjAnZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbCUzQiUyMCU1QyUwQWZyb20lMjBkZWVwc3BlZWQucnVudGltZS56ZXJvLnN0YWdlMyUyMGltcG9ydCUyMGVzdGltYXRlX3plcm8zX21vZGVsX3N0YXRlc19tZW1fbmVlZHNfYWxsX2xpdmUlM0IlMjAlNUMlMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIyYmlnc2NpZW5jZSUyRlQwXzNCJTIyKSUzQiUyMCU1QyUwQWVzdGltYXRlX3plcm8zX21vZGVsX3N0YXRlc19tZW1fbmVlZHNfYWxsX2xpdmUobW9kZWwlMkMlMjBudW1fZ3B1c19wZXJfbm9kZSUzRDElMkMlMjBudW1fbm9kZXMlM0QxKSclMEElNUIuLi4lNUQlMEFFc3RpbWF0ZWQlMjBtZW1vcnklMjBuZWVkZWQlMjBmb3IlMjBwYXJhbXMlMkMlMjBvcHRpbSUyMHN0YXRlcyUyMGFuZCUyMGdyYWRpZW50cyUyMGZvciUyMGElM0ElMEFIVyUzQSUyMFNldHVwJTIwd2l0aCUyMDElMjBub2RlJTJDJTIwMSUyMEdQVSUyMHBlciUyMG5vZGUuJTBBU1clM0ElMjBNb2RlbCUyMHdpdGglMjAyNzgzTSUyMHRvdGFsJTIwcGFyYW1zJTJDJTIwNjVNJTIwbGFyZ2VzdCUyMGxheWVyJTIwcGFyYW1zLiUwQSUyMCUyMHBlciUyMENQVSUyMCUyMCU3QyUyMCUyMHBlciUyMEdQVSUyMCU3QyUyMCUyMCUyME9wdGlvbnMlMEElMjAlMjAlMjA3MC4wMEdCJTIwJTdDJTIwJTIwJTIwMC4yNUdCJTIwJTdDJTIwb2ZmbG9hZF9wYXJhbSUzRGNwdSUyMCUyQyUyMG9mZmxvYWRfb3B0aW1pemVyJTNEY3B1JTIwJTJDJTIwemVyb19pbml0JTNEMSUwQSUyMCUyMCUyMDcwLjAwR0IlMjAlN0MlMjAlMjAlMjAwLjI1R0IlMjAlN0MlMjBvZmZsb2FkX3BhcmFtJTNEY3B1JTIwJTJDJTIwb2ZmbG9hZF9vcHRpbWl6ZXIlM0RjcHUlMjAlMkMlMjB6ZXJvX2luaXQlM0QwJTBBJTIwJTIwJTIwNjIuMjNHQiUyMCU3QyUyMCUyMCUyMDUuNDNHQiUyMCU3QyUyMG9mZmxvYWRfcGFyYW0lM0Rub25lJTJDJTIwb2ZmbG9hZF9vcHRpbWl6ZXIlM0RjcHUlMjAlMkMlMjB6ZXJvX2luaXQlM0QxJTBBJTIwJTIwJTIwNjIuMjNHQiUyMCU3QyUyMCUyMCUyMDUuNDNHQiUyMCU3QyUyMG9mZmxvYWRfcGFyYW0lM0Rub25lJTJDJTIwb2ZmbG9hZF9vcHRpbWl6ZXIlM0RjcHUlMjAlMkMlMjB6ZXJvX2luaXQlM0QwJTBBJTIwJTIwJTIwJTIwMC4zN0dCJTIwJTdDJTIwJTIwNDYuOTFHQiUyMCU3QyUyMG9mZmxvYWRfcGFyYW0lM0Rub25lJTJDJTIwb2ZmbG9hZF9vcHRpbWl6ZXIlM0Rub25lJTJDJTIwemVyb19pbml0JTNEMSUwQSUyMCUyMCUyMDE1LjU2R0IlMjAlN0MlMjAlMjA0Ni45MUdCJTIwJTdDJTIwb2ZmbG9hZF9wYXJhbSUzRG5vbmUlMkMlMjBvZmZsb2FkX29wdGltaXplciUzRG5vbmUlMkMlMjB6ZXJvX2luaXQlM0Qw",highlighted:`$ python -c <span class="hljs-string">'from transformers import AutoModel; \\ | |
| from deepspeed.runtime.zero.stage3 import estimate_zero3_model_states_mem_needs_all_live; \\ | |
| model = AutoModel.from_pretrained("bigscience/T0_3B"); \\ | |
| estimate_zero3_model_states_mem_needs_all_live(model, num_gpus_per_node=1, num_nodes=1)'</span> | |
| [...] | |
| Estimated memory needed <span class="hljs-keyword">for</span> params, optim states and gradients <span class="hljs-keyword">for</span> a: | |
| HW: Setup with 1 node, 1 GPU per node. | |
| SW: Model with 2783M total params, 65M largest layer params. | |
| per CPU | per GPU | Options | |
| 70.00GB | 0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=1 | |
| 70.00GB | 0.25GB | offload_param=cpu , offload_optimizer=cpu , zero_init=0 | |
| 62.23GB | 5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=1 | |
| 62.23GB | 5.43GB | offload_param=none, offload_optimizer=cpu , zero_init=0 | |
| 0.37GB | 46.91GB | offload_param=none, offload_optimizer=none, zero_init=1 | |
| 15.56GB | 46.91GB | offload_param=none, offload_optimizer=none, zero_init=0`,wrap:!1}}),N=new k({props:{title:"ZeRO 단계 설정하기",local:"select-a-zero-stage",headingTag:"h2"}}),$l=new k({props:{title:"DeepSpeed 구성 파일",local:"deepspeed-configuration-file",headingTag:"h2"}}),Tl=new al({props:{$$slots:{default:[On]},$$scope:{ctx:A}}}),pl=new Jl({props:{id:"pass-config",options:["path to file","nested dict"],$$slots:{default:[qn]},$$scope:{ctx:A}}}),Bl=new k({props:{title:"DeepSpeed와 Trainer 매개변수",local:"deepspeed-and-trainer-parameters",headingTag:"h3"}}),Nl=new k({props:{title:"ZeRO 구성",local:"zero-configuration",headingTag:"h3"}}),yl=new al({props:{warning:!0,$$slots:{default:[vn]},$$scope:{ctx:A}}}),il=new Jl({props:{id:"zero-config",options:["ZeRO-1","ZeRO-2","ZeRO-3"],$$slots:{default:[xn]},$$scope:{ctx:A}}}),kl=new k({props:{title:"NVMe 설정",local:"nvme-configuration",headingTag:"h3"}}),ql=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyZnAxNiUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVuYWJsZWQlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybG9zc19zY2FsZSUyMiUzQSUyMDAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJsb3NzX3NjYWxlX3dpbmRvdyUyMiUzQSUyMDEwMDAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJpbml0aWFsX3NjYWxlX3Bvd2VyJTIyJTNBJTIwMTYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJoeXN0ZXJlc2lzJTIyJTNBJTIwMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1pbl9sb3NzX3NjYWxlJTIyJTNBJTIwMSUwQSUyMCUyMCUyMCUyMCU3RCUyQyUwQSUwQSUyMCUyMCUyMCUyMCUyMm9wdGltaXplciUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnR5cGUlMjIlM0ElMjAlMjJBZGFtVyUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnBhcmFtcyUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmxyJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmJldGFzJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVwcyUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ3ZWlnaHRfZGVjYXklMjIlM0ElMjAlMjJhdXRvJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTBBJTIwJTIwJTIwJTIwJTIyc2NoZWR1bGVyJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydHlwZSUyMiUzQSUyMCUyMldhcm11cExSJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGFyYW1zJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyd2FybXVwX21pbl9sciUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ3YXJtdXBfbWF4X2xyJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndhcm11cF9udW1fc3RlcHMlMjIlM0ElMjAlMjJhdXRvJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdEJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTBBJTIwJTIwJTIwJTIwJTIyemVyb19vcHRpbWl6YXRpb24lMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZSUyMiUzQSUyMDMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvZmZsb2FkX29wdGltaXplciUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmRldmljZSUyMiUzQSUyMCUyMm52bWUlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJudm1lX3BhdGglMjIlM0ElMjAlMjIlMkZsb2NhbF9udm1lJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGluX21lbW9yeSUyMiUzQSUyMHRydWUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJidWZmZXJfY291bnQlMjIlM0ElMjA0JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZmFzdF9pbml0JTIyJTNBJTIwZmFsc2UlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvZmZsb2FkX3BhcmFtJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZGV2aWNlJTIyJTNBJTIwJTIybnZtZSUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm52bWVfcGF0aCUyMiUzQSUyMCUyMiUyRmxvY2FsX252bWUlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJwaW5fbWVtb3J5JTIyJTNBJTIwdHJ1ZSUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmJ1ZmZlcl9jb3VudCUyMiUzQSUyMDUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJidWZmZXJfc2l6ZSUyMiUzQSUyMDFlOCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1heF9pbl9jcHUlMjIlM0ElMjAxZTklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJhaW8lMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJibG9ja19zaXplJTIyJTNBJTIwMjYyMTQ0JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycXVldWVfZGVwdGglMjIlM0ElMjAzMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnRocmVhZF9jb3VudCUyMiUzQSUyMDElMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzaW5nbGVfc3VibWl0JTIyJTNBJTIwZmFsc2UlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvdmVybGFwX2V2ZW50cyUyMiUzQSUyMHRydWUlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvdmVybGFwX2NvbW0lMjIlM0ElMjB0cnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGlndW91c19ncmFkaWVudHMlMjIlM0ElMjB0cnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3ViX2dyb3VwX3NpemUlMjIlM0ElMjAxZTklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJyZWR1Y2VfYnVja2V0X3NpemUlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RhZ2UzX3ByZWZldGNoX2J1Y2tldF9zaXplJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN0YWdlM19wYXJhbV9wZXJzaXN0ZW5jZV90aHJlc2hvbGQlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RhZ2UzX21heF9saXZlX3BhcmFtZXRlcnMlMjIlM0ElMjAxZTklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZTNfbWF4X3JldXNlX2Rpc3RhbmNlJTIyJTNBJTIwMWU5JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RhZ2UzX2dhdGhlcl8xNmJpdF93ZWlnaHRzX29uX21vZGVsX3NhdmUlMjIlM0ElMjB0cnVlJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTBBJTIwJTIwJTIwJTIwJTIyZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMmdyYWRpZW50X2NsaXBwaW5nJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMnN0ZXBzX3Blcl9wcmludCUyMiUzQSUyMDIwMDAlMkMlMEElMjAlMjAlMjAlMjAlMjJ0cmFpbl9iYXRjaF9zaXplJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMnRyYWluX21pY3JvX2JhdGNoX3NpemVfcGVyX2dwdSUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJ3YWxsX2Nsb2NrX2JyZWFrZG93biUyMiUzQSUyMGZhbHNlJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"fp16":</span> { | |
| <span class="hljs-attr">"enabled":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"loss_scale":</span> <span class="hljs-number">0</span>, | |
| <span class="hljs-attr">"loss_scale_window":</span> <span class="hljs-number">1000</span>, | |
| <span class="hljs-attr">"initial_scale_power":</span> <span class="hljs-number">16</span>, | |
| <span class="hljs-attr">"hysteresis":</span> <span class="hljs-number">2</span>, | |
| <span class="hljs-attr">"min_loss_scale":</span> <span class="hljs-number">1</span> | |
| }, | |
| <span class="hljs-attr">"optimizer":</span> { | |
| <span class="hljs-attr">"type":</span> <span class="hljs-string">"AdamW"</span>, | |
| <span class="hljs-attr">"params":</span> { | |
| <span class="hljs-attr">"lr":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"betas":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"eps":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"weight_decay":</span> <span class="hljs-string">"auto"</span> | |
| } | |
| }, | |
| <span class="hljs-attr">"scheduler":</span> { | |
| <span class="hljs-attr">"type":</span> <span class="hljs-string">"WarmupLR"</span>, | |
| <span class="hljs-attr">"params":</span> { | |
| <span class="hljs-attr">"warmup_min_lr":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"warmup_max_lr":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"warmup_num_steps":</span> <span class="hljs-string">"auto"</span> | |
| } | |
| }, | |
| <span class="hljs-attr">"zero_optimization":</span> { | |
| <span class="hljs-attr">"stage":</span> <span class="hljs-number">3</span>, | |
| <span class="hljs-attr">"offload_optimizer":</span> { | |
| <span class="hljs-attr">"device":</span> <span class="hljs-string">"nvme"</span>, | |
| <span class="hljs-attr">"nvme_path":</span> <span class="hljs-string">"/local_nvme"</span>, | |
| <span class="hljs-attr">"pin_memory":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"buffer_count":</span> <span class="hljs-number">4</span>, | |
| <span class="hljs-attr">"fast_init":</span> <span class="hljs-literal">false</span> | |
| }, | |
| <span class="hljs-attr">"offload_param":</span> { | |
| <span class="hljs-attr">"device":</span> <span class="hljs-string">"nvme"</span>, | |
| <span class="hljs-attr">"nvme_path":</span> <span class="hljs-string">"/local_nvme"</span>, | |
| <span class="hljs-attr">"pin_memory":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"buffer_count":</span> <span class="hljs-number">5</span>, | |
| <span class="hljs-attr">"buffer_size":</span> <span class="hljs-number">1e8</span>, | |
| <span class="hljs-attr">"max_in_cpu":</span> <span class="hljs-number">1e9</span> | |
| }, | |
| <span class="hljs-attr">"aio":</span> { | |
| <span class="hljs-attr">"block_size":</span> <span class="hljs-number">262144</span>, | |
| <span class="hljs-attr">"queue_depth":</span> <span class="hljs-number">32</span>, | |
| <span class="hljs-attr">"thread_count":</span> <span class="hljs-number">1</span>, | |
| <span class="hljs-attr">"single_submit":</span> <span class="hljs-literal">false</span>, | |
| <span class="hljs-attr">"overlap_events":</span> <span class="hljs-literal">true</span> | |
| }, | |
| <span class="hljs-attr">"overlap_comm":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"contiguous_gradients":</span> <span class="hljs-literal">true</span>, | |
| <span class="hljs-attr">"sub_group_size":</span> <span class="hljs-number">1e9</span>, | |
| <span class="hljs-attr">"reduce_bucket_size":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"stage3_prefetch_bucket_size":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"stage3_param_persistence_threshold":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"stage3_max_live_parameters":</span> <span class="hljs-number">1e9</span>, | |
| <span class="hljs-attr">"stage3_max_reuse_distance":</span> <span class="hljs-number">1e9</span>, | |
| <span class="hljs-attr">"stage3_gather_16bit_weights_on_model_save":</span> <span class="hljs-literal">true</span> | |
| }, | |
| <span class="hljs-attr">"gradient_accumulation_steps":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"gradient_clipping":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"steps_per_print":</span> <span class="hljs-number">2000</span>, | |
| <span class="hljs-attr">"train_batch_size":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"train_micro_batch_size_per_gpu":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"wall_clock_breakdown":</span> <span class="hljs-literal">false</span> | |
| }`,wrap:!1}}),vl=new k({props:{title:"DeepSpeed 구성",local:"deepspeed-features",headingTag:"h2"}}),Xl=new k({props:{title:"활성화/그레이디언트 체크포인팅",local:"activationgradient-checkpointing",headingTag:"h3"}}),xl=new k({props:{title:"옵티마이저와 스케줄러",local:"optimizer-and-scheduler",headingTag:"h3"}}),ol=new al({props:{warning:!0,$$slots:{default:[Yn]},$$scope:{ctx:A}}}),jl=new Jl({props:{id:"opt-sched",options:["optimizer","scheduler"],$$slots:{default:[Kn]},$$scope:{ctx:A}}}),Ll=new k({props:{title:"정밀도",local:"precision",headingTag:"h3"}}),Cl=new Jl({props:{id:"precision",options:["fp32","fp16","bf16"],$$slots:{default:[sJ]},$$scope:{ctx:A}}}),Kl=new k({props:{title:"배치 크기",local:"batch-size",headingTag:"h3"}}),ee=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIydHJhaW5fbWljcm9fYmF0Y2hfc2l6ZV9wZXJfZ3B1JTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMnRyYWluX2JhdGNoX3NpemUlMjIlM0ElMjAlMjJhdXRvJTIyJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"train_micro_batch_size_per_gpu":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"train_batch_size":</span> <span class="hljs-string">"auto"</span> | |
| }`,wrap:!1}}),te=new k({props:{title:"그레이디언트 누적",local:"gradient-accumulation",headingTag:"h3"}}),Me=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTIyJTNBJTIwJTIyYXV0byUyMiUwQSU3RCUwQQ==",highlighted:`{ | |
| <span class="hljs-attr">"gradient_accumulation_steps":</span> <span class="hljs-string">"auto"</span> | |
| } | |
| `,wrap:!1}}),Ue=new k({props:{title:"그레이디언트 클리핑",local:"gradient-clipping",headingTag:"h3"}}),Je=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyZ3JhZGllbnRfY2xpcHBpbmclMjIlM0ElMjAlMjJhdXRvJTIyJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"gradient_clipping":</span> <span class="hljs-string">"auto"</span> | |
| }`,wrap:!1}}),ae=new k({props:{title:"통신 데이터 유형(Communication data type)",local:"communication-data-type",headingTag:"h3"}}),oe=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyY29tbXVuaWNhdGlvbl9kYXRhX3R5cGUlMjIlM0ElMjAlMjJmcDMyJTIyJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"communication_data_type":</span> <span class="hljs-string">"fp32"</span> | |
| }`,wrap:!1}}),je=new k({props:{title:"모델 배포",local:"deployment",headingTag:"h2"}}),wl=new Jl({props:{id:"deploy",options:["multi-GPU","single-GPU"],$$slots:{default:[JJ]},$$scope:{ctx:A}}}),re=new k({props:{title:"다중 노드 환경에서의 모델 배포",local:"multi-node-deployment",headingTag:"h3"}}),ue=new g({props:{code:"JTdCJTBBJTIwJTIwJTIyY2hlY2twb2ludCUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMnVzZV9ub2RlX2xvY2FsX3N0b3JhZ2UlMjIlM0ElMjB0cnVlJTBBJTIwJTIwJTdEJTBBJTdE",highlighted:`{ | |
| <span class="hljs-attr">"checkpoint":</span> { | |
| <span class="hljs-attr">"use_node_local_storage":</span> <span class="hljs-literal">true</span> | |
| } | |
| }`,wrap:!1}}),rl=new Jl({props:{id:"multinode",options:["torchrun","deepspeed"],$$slots:{default:[pJ]},$$scope:{ctx:A}}}),de=new k({props:{title:"SLURM",local:"slurm",headingTag:"h3"}}),Ve=new g({props:{code:"JTIzU0JBVENIJTIwLS1qb2ItbmFtZSUzRHRlc3Qtbm9kZXMlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjAlRUMlOUUlOTElRUMlOTclODUlMjAlRUMlOUQlQjQlRUIlQTYlODQlMEElMjNTQkFUQ0glMjAtLW5vZGVzJTNEMiUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMCVFQiU4NSVCOCVFQiU5MyU5QyUyMCVFQyU4OCU5OCUwQSUyM1NCQVRDSCUyMC0tbnRhc2tzLXBlci1ub2RlJTNEMSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMCVFQyVBNCU5MSVFQyU5QSU5NCUyMC0lMjAlRUIlODUlQjglRUIlOTMlOUMlRUIlOEIlQjklMjAlRUIlQjYlODQlRUMlODIlQjAlMjAlRUMlOUUlOTElRUMlOTclODUlMjAxJUVBJUIwJTlDISUwQSUyM1NCQVRDSCUyMC0tY3B1cy1wZXItdGFzayUzRDEwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwJUVDJTlFJTkxJUVDJTk3JTg1JUVCJThCJUI5JTIwQ1BVJTIwJUVDJUJEJTk0JUVDJTk2JUI0JTIwJUVDJTg4JTk4JTBBJTIzU0JBVENIJTIwLS1ncmVzJTNEZ3B1JTNBOCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMGdwdSUyMCVFQyU4OCU5OCUwQSUyM1NCQVRDSCUyMC0tdGltZSUyMDIwJTNBMDAlM0EwMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMCVFQyVCNSU5QyVFQiU4QyU4MCUyMCVFQyU4QiVBNCVFRCU5NiU4OSUyMCVFQyU4QiU5QyVFQSVCMCU4NCUyMChISCUzQU1NJTNBU1MpJTBBJTIzU0JBVENIJTIwLS1vdXRwdXQlM0QlMjV4LSUyNWoub3V0JTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwJUVDJUI2JTlDJUVCJUEwJUE1JTIwJUVEJThDJThDJUVDJTlEJUJDJTIwJUVDJTlEJUI0JUVCJUE2JTg0JTBBJTBBZXhwb3J0JTIwR1BVU19QRVJfTk9ERSUzRDglMEFleHBvcnQlMjBNQVNURVJfQUREUiUzRCUyNChzY29udHJvbCUyMHNob3clMjBob3N0bmFtZXMlMjAlMjRTTFVSTV9KT0JfTk9ERUxJU1QlMjAlN0MlMjBoZWFkJTIwLW4lMjAxKSUwQWV4cG9ydCUyME1BU1RFUl9QT1JUJTNEOTkwMSUwQSUwQXNydW4lMjAtLWpvYmlkJTIwJTI0U0xVUk1fSk9CSUQlMjBiYXNoJTIwLWMlMjAncHl0aG9uJTIwLW0lMjB0b3JjaC5kaXN0cmlidXRlZC5ydW4lMjAlNUMlMEElMjAtLW5wcm9jX3Blcl9ub2RlJTIwJTI0R1BVU19QRVJfTk9ERSUyMC0tbm5vZGVzJTIwJTI0U0xVUk1fTk5PREVTJTIwLS1ub2RlX3JhbmslMjAlMjRTTFVSTV9QUk9DSUQlMjAlNUMlMEElMjAtLW1hc3Rlcl9hZGRyJTIwJTI0TUFTVEVSX0FERFIlMjAtLW1hc3Rlcl9wb3J0JTIwJTI0TUFTVEVSX1BPUlQlMjAlNUMlMEF5b3VyX3Byb2dyYW0ucHklMjAlM0Nub3JtYWwlMjBjbCUyMGFyZ3MlM0UlMjAtLWRlZXBzcGVlZCUyMGRzX2NvbmZpZy5qc29uJw==",highlighted:`<span class="hljs-comment">#SBATCH --job-name=test-nodes # 작업 이름</span> | |
| <span class="hljs-comment">#SBATCH --nodes=2 # 노드 수</span> | |
| <span class="hljs-comment">#SBATCH --ntasks-per-node=1 # 중요 - 노드당 분산 작업 1개!</span> | |
| <span class="hljs-comment">#SBATCH --cpus-per-task=10 # 작업당 CPU 코어 수</span> | |
| <span class="hljs-comment">#SBATCH --gres=gpu:8 # gpu 수</span> | |
| <span class="hljs-comment">#SBATCH --time 20:00:00 # 최대 실행 시간 (HH:MM:SS)</span> | |
| <span class="hljs-comment">#SBATCH --output=%x-%j.out # 출력 파일 이름</span> | |
| <span class="hljs-built_in">export</span> GPUS_PER_NODE=8 | |
| <span class="hljs-built_in">export</span> MASTER_ADDR=$(scontrol show hostnames <span class="hljs-variable">$SLURM_JOB_NODELIST</span> | <span class="hljs-built_in">head</span> -n 1) | |
| <span class="hljs-built_in">export</span> MASTER_PORT=9901 | |
| srun --jobid <span class="hljs-variable">$SLURM_JOBID</span> bash -c <span class="hljs-string">'python -m torch.distributed.run \\ | |
| --nproc_per_node $GPUS_PER_NODE --nnodes $SLURM_NNODES --node_rank $SLURM_PROCID \\ | |
| --master_addr $MASTER_ADDR --master_port $MASTER_PORT \\ | |
| your_program.py <normal cl args> --deepspeed ds_config.json'</span>`,wrap:!1}}),Ae=new g({props:{code:"c2JhdGNoJTIwbGF1bmNoLnNsdXJt",highlighted:"sbatch launch.slurm",wrap:!1}}),$e=new k({props:{title:"노트북",local:"notebook",headingTag:"h3"}}),Re=new g({props:{code:"JTIzJTIwRGVlcFNwZWVkJUVCJThBJTk0JTIwJUVCJThCJUE4JUVDJTlEJUJDJTIwJUVEJTk0JTg0JUVCJUExJTlDJUVDJTg0JUI4JUVDJThBJUE0JUVCJUE3JThDJTIwJUVDJTgyJUFDJUVDJTlBJUE5JUVEJTk1JTk4JUVCJThEJTk0JUVCJTlEJUJDJUVCJThGJTg0JTIwJUVCJUI2JTg0JUVDJTgyJUIwJTIwJUVEJTk5JTk4JUVBJUIyJUJEJUVDJTlEJTg0JTIwJUVEJTk1JTg0JUVDJTlBJTk0JUVCJUExJTlDJTIwJUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQSUyMyUyMCVFQyU5RCVCNCUyMCVFQyVCRCU5NCVFQiU5MyU5QyVFQiVBMSU5QyUyMCVFQiVCNiU4NCVFQyU4MiVCMCUyMCVFRCU5OSU5OCVFQSVCMiVCRCVFQyU5RCU4NCUyMCVFQiVBQSVBOCVFQiVCMCVBOSVFRCU5NSVBOSVFQiU4QiU4OCVFQiU4QiVBNC4lMEFpbXBvcnQlMjBvcyUwQSUwQW9zLmVudmlyb24lNUIlMjJNQVNURVJfQUREUiUyMiU1RCUyMCUzRCUyMCUyMmxvY2FsaG9zdCUyMiUwQW9zLmVudmlyb24lNUIlMjJNQVNURVJfUE9SVCUyMiU1RCUyMCUzRCUyMCUyMjk5OTQlMjIlMjAlMjAlMjMlMjBSdW50aW1lRXJyb3IlM0ElMjBBZGRyZXNzJTIwYWxyZWFkeSUyMGluJTIwdXNlJTIwJUVDJTk4JUE0JUVCJUE1JTk4JTIwJUVCJUIwJTlDJUVDJTgzJTlEJTIwJUVDJThCJTlDJTIwJUVDJTg4JTk4JUVDJUEwJTk1JTBBb3MuZW52aXJvbiU1QiUyMlJBTkslMjIlNUQlMjAlM0QlMjAlMjIwJTIyJTBBb3MuZW52aXJvbiU1QiUyMkxPQ0FMX1JBTkslMjIlNUQlMjAlM0QlMjAlMjIwJTIyJTBBb3MuZW52aXJvbiU1QiUyMldPUkxEX1NJWkUlMjIlNUQlMjAlM0QlMjAlMjIxJTIyJTBBJTBBJTIzJTIwJUVDJTlEJUI0JUVDJUEwJTlDJTIwJUVEJThGJTg5JUVDJTg2JThDJUVDJTk5JTgwJTIwJUVBJUIwJTk5JUVDJTlEJUI0JTIwJUVDJUE3JTg0JUVEJTk2JTg5JUVEJTk1JTk4JUVCJTkwJTk4JTJDJTIwRGVlcFNwZWVkJTIwJUVDJTg0JUE0JUVDJUEwJTk1JTIwJUVEJThDJThDJUVDJTlEJUJDJUVDJTlEJTg0JTIwJUVDJUEwJTg0JUVCJThCJUFDJUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBUcmFpbmluZ0FyZ3VtZW50cyguLi4lMkMlMjBkZWVwc3BlZWQlM0QlMjJkc19jb25maWdfemVybzMuanNvbiUyMiklMEF0cmFpbmVyJTIwJTNEJTIwVHJhaW5lciguLi4pJTBBdHJhaW5lci50cmFpbigp",highlighted:`<span class="hljs-comment"># DeepSpeed는 단일 프로세스만 사용하더라도 분산 환경을 필요로 합니다.</span> | |
| <span class="hljs-comment"># 이 코드로 분산 환경을 모방합니다.</span> | |
| <span class="hljs-keyword">import</span> os | |
| os.environ[<span class="hljs-string">"MASTER_ADDR"</span>] = <span class="hljs-string">"localhost"</span> | |
| os.environ[<span class="hljs-string">"MASTER_PORT"</span>] = <span class="hljs-string">"9994"</span> <span class="hljs-comment"># RuntimeError: Address already in use 오류 발생 시 수정</span> | |
| os.environ[<span class="hljs-string">"RANK"</span>] = <span class="hljs-string">"0"</span> | |
| os.environ[<span class="hljs-string">"LOCAL_RANK"</span>] = <span class="hljs-string">"0"</span> | |
| os.environ[<span class="hljs-string">"WORLD_SIZE"</span>] = <span class="hljs-string">"1"</span> | |
| <span class="hljs-comment"># 이제 평소와 같이 진행하되, DeepSpeed 설정 파일을 전달합니다.</span> | |
| training_args = TrainingArguments(..., deepspeed=<span class="hljs-string">"ds_config_zero3.json"</span>) | |
| trainer = Trainer(...) | |
| trainer.train()`,wrap:!1}}),_e=new g({props:{code:"JTI1JTI1YmFzaCUwQWNhdCUyMCUzQyUzQydFT1QnJTIwJTNFJTIwZHNfY29uZmlnX3plcm8zLmpzb24lMEElN0IlMEElMjAlMjAlMjAlMjAlMjJmcDE2JTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZW5hYmxlZCUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJsb3NzX3NjYWxlJTIyJTNBJTIwMCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmxvc3Nfc2NhbGVfd2luZG93JTIyJTNBJTIwMTAwMCUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmluaXRpYWxfc2NhbGVfcG93ZXIlMjIlM0ElMjAxNiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmh5c3RlcmVzaXMlMjIlM0ElMjAyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybWluX2xvc3Nfc2NhbGUlMjIlM0ElMjAxJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTBBJTIwJTIwJTIwJTIwJTIyb3B0aW1pemVyJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIydHlwZSUyMiUzQSUyMCUyMkFkYW1XJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGFyYW1zJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybHIlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyYmV0YXMlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZXBzJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndlaWdodF9kZWNheSUyMiUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElMEElMjAlMjAlMjAlMjAlMjJzY2hlZHVsZXIlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ0eXBlJTIyJTNBJTIwJTIyV2FybXVwTFIlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJwYXJhbXMlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJ3YXJtdXBfbWluX2xyJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMndhcm11cF9tYXhfbHIlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyd2FybXVwX251bV9zdGVwcyUyMiUzQSUyMCUyMmF1dG8lMjIlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElMEElMjAlMjAlMjAlMjAlMjJ6ZXJvX29wdGltaXphdGlvbiUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN0YWdlJTIyJTNBJTIwMyUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm9mZmxvYWRfb3B0aW1pemVyJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZGV2aWNlJTIyJTNBJTIwJTIyY3B1JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGluX21lbW9yeSUyMiUzQSUyMHRydWUlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvZmZsb2FkX3BhcmFtJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZGV2aWNlJTIyJTNBJTIwJTIyY3B1JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGluX21lbW9yeSUyMiUzQSUyMHRydWUlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvdmVybGFwX2NvbW0lMjIlM0ElMjB0cnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGlndW91c19ncmFkaWVudHMlMjIlM0ElMjB0cnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3ViX2dyb3VwX3NpemUlMjIlM0ElMjAxZTklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJyZWR1Y2VfYnVja2V0X3NpemUlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RhZ2UzX3ByZWZldGNoX2J1Y2tldF9zaXplJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMnN0YWdlM19wYXJhbV9wZXJzaXN0ZW5jZV90aHJlc2hvbGQlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RhZ2UzX21heF9saXZlX3BhcmFtZXRlcnMlMjIlM0ElMjAxZTklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZTNfbWF4X3JldXNlX2Rpc3RhbmNlJTIyJTNBJTIwMWU5JTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyc3RhZ2UzX2dhdGhlcl8xNmJpdF93ZWlnaHRzX29uX21vZGVsX3NhdmUlMjIlM0ElMjB0cnVlJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTBBJTIwJTIwJTIwJTIwJTIyZ3JhZGllbnRfYWNjdW11bGF0aW9uX3N0ZXBzJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMmdyYWRpZW50X2NsaXBwaW5nJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMnN0ZXBzX3Blcl9wcmludCUyMiUzQSUyMDIwMDAlMkMlMEElMjAlMjAlMjAlMjAlMjJ0cmFpbl9iYXRjaF9zaXplJTIyJTNBJTIwJTIyYXV0byUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMnRyYWluX21pY3JvX2JhdGNoX3NpemVfcGVyX2dwdSUyMiUzQSUyMCUyMmF1dG8lMjIlMkMlMEElMjAlMjAlMjAlMjAlMjJ3YWxsX2Nsb2NrX2JyZWFrZG93biUyMiUzQSUyMGZhbHNlJTBBJTdEJTBBRU9U",highlighted:`%%bash | |
| cat <<<span class="hljs-string">'EOT'</span> > ds_config_zero3.json | |
| { | |
| <span class="hljs-string">"fp16"</span>: { | |
| <span class="hljs-string">"enabled"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"loss_scale"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-string">"loss_scale_window"</span>: <span class="hljs-number">1000</span>, | |
| <span class="hljs-string">"initial_scale_power"</span>: <span class="hljs-number">16</span>, | |
| <span class="hljs-string">"hysteresis"</span>: <span class="hljs-number">2</span>, | |
| <span class="hljs-string">"min_loss_scale"</span>: <span class="hljs-number">1</span> | |
| }, | |
| <span class="hljs-string">"optimizer"</span>: { | |
| <span class="hljs-string">"type"</span>: <span class="hljs-string">"AdamW"</span>, | |
| <span class="hljs-string">"params"</span>: { | |
| <span class="hljs-string">"lr"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"betas"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"eps"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"weight_decay"</span>: <span class="hljs-string">"auto"</span> | |
| } | |
| }, | |
| <span class="hljs-string">"scheduler"</span>: { | |
| <span class="hljs-string">"type"</span>: <span class="hljs-string">"WarmupLR"</span>, | |
| <span class="hljs-string">"params"</span>: { | |
| <span class="hljs-string">"warmup_min_lr"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"warmup_max_lr"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"warmup_num_steps"</span>: <span class="hljs-string">"auto"</span> | |
| } | |
| }, | |
| <span class="hljs-string">"zero_optimization"</span>: { | |
| <span class="hljs-string">"stage"</span>: <span class="hljs-number">3</span>, | |
| <span class="hljs-string">"offload_optimizer"</span>: { | |
| <span class="hljs-string">"device"</span>: <span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-string">"pin_memory"</span>: true | |
| }, | |
| <span class="hljs-string">"offload_param"</span>: { | |
| <span class="hljs-string">"device"</span>: <span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-string">"pin_memory"</span>: true | |
| }, | |
| <span class="hljs-string">"overlap_comm"</span>: true, | |
| <span class="hljs-string">"contiguous_gradients"</span>: true, | |
| <span class="hljs-string">"sub_group_size"</span>: <span class="hljs-number">1e9</span>, | |
| <span class="hljs-string">"reduce_bucket_size"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"stage3_prefetch_bucket_size"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"stage3_param_persistence_threshold"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"stage3_max_live_parameters"</span>: <span class="hljs-number">1e9</span>, | |
| <span class="hljs-string">"stage3_max_reuse_distance"</span>: <span class="hljs-number">1e9</span>, | |
| <span class="hljs-string">"stage3_gather_16bit_weights_on_model_save"</span>: true | |
| }, | |
| <span class="hljs-string">"gradient_accumulation_steps"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"gradient_clipping"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"steps_per_print"</span>: <span class="hljs-number">2000</span>, | |
| <span class="hljs-string">"train_batch_size"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"train_micro_batch_size_per_gpu"</span>: <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-string">"wall_clock_breakdown"</span>: false | |
| } | |
| EOT`,wrap:!1}}),be=new g({props:{code:"IWdpdCUyMGNsb25lJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZ0cmFuc2Zvcm1lcnMlMEEhY2QlMjB0cmFuc2Zvcm1lcnMlM0IlMjBkZWVwc3BlZWQlMjBleGFtcGxlcyUyRnB5dG9yY2glMkZ0cmFuc2xhdGlvbiUyRnJ1bl90cmFuc2xhdGlvbi5weSUyMC4uLg==",highlighted:`!git clone https://github.com/huggingface/transformers | |
| !cd transformers; deepspeed examples/pytorch/translation/run_translation.py ...`,wrap:!1}}),Ze=new g({props:{code:"JTI1JTI1YmFzaCUwQSUwQWdpdCUyMGNsb25lJTIwaHR0cHMlM0ElMkYlMkZnaXRodWIuY29tJTJGaHVnZ2luZ2ZhY2UlMkZ0cmFuc2Zvcm1lcnMlMEFjZCUyMHRyYW5zZm9ybWVycyUwQWRlZXBzcGVlZCUyMGV4YW1wbGVzJTJGcHl0b3JjaCUyRnRyYW5zbGF0aW9uJTJGcnVuX3RyYW5zbGF0aW9uLnB5JTIwLi4u",highlighted:`%%bash | |
| git clone https://github.com/huggingface/transformers | |
| cd transformers | |
| deepspeed examples/pytorch/translation/run_translation.py ...`,wrap:!1}}),Ne=new k({props:{title:"모델 가중치 저장하기",local:"save-model-weights",headingTag:"h2"}}),Il=new Jl({props:{id:"save",options:["fp16","fp32"],$$slots:{default:[CJ]},$$scope:{ctx:A}}}),Se=new k({props:{title:"ZeRO Inference",local:"zero-inference",headingTag:"h2"}}),We=new g({props:{code:"ZGVlcHNwZWVkJTIwLS1udW1fZ3B1cyUzRDIlMjB5b3VyX3Byb2dyYW0ucHklMjAlM0Nub3JtYWwlMjBjbCUyMGFyZ3MlM0UlMjAtLWRvX2V2YWwlMjAtLWRlZXBzcGVlZCUyMGRzX2NvbmZpZy5qc29u",highlighted:"deepspeed --num_gpus=2 your_program.py <normal cl args> --do_eval --deepspeed ds_config.json",wrap:!1}}),qe=new k({props:{title:"Trainer 없이 DeepSpeed 사용하기",local:"non-trainer-deepspeed-integration",headingTag:"h2"}}),cl=new al({props:{$$slots:{default:[wJ]},$$scope:{ctx:A}}}),ul=new Jl({props:{id:"models",options:["pretrained model","non-pretrained model"],$$slots:{default:[cJ]},$$scope:{ctx:A}}}),Xe=new k({props:{title:"Trainer 없이 ZeRO Inference 사용하기",local:"non-trainer-zero-inference",headingTag:"h3"}}),Le=new g({props:{code:"JTIzISUyRnVzciUyRmJpbiUyRmVudiUyMHB5dGhvbiUwQSUwQSUyMyUyMCVFQyU5RCVCNCUyMCVFQyU4QSVBNCVFRCU4MSVBQyVFQiVBNiVCRCVFRCU4QSVCOCVFQiU4QSU5NCUyMCVFQiU4QiVBOCVFQyU5RCVCQyUyMEdQVSVFQyU5NyU5MCUyMCVFQiVBQSVBOCVFQiU4RCVCOCVFQyU5RCU4NCUyMCVFQiVBNyU5RSVFQyVCNiU5QyUyMCVFQyU4OCU5OCUyMCVFQyU5NyU4NiVFQyU5RCU4NCUyMCVFQiU5NSU4QyUyMCVFQyVCNiU5NCVFQiVBMSVBMCUyMCVFQiVBQSVBOCVFQiU5MyU5QyVFQyU5NyU5MCVFQyU4NCU5QyUyMERlZXBzcGVlZCUyMFplUk8lRUIlQTUlQkMlMjAlRUMlODIlQUMlRUMlOUElQTklRUQlOTUlOTglRUIlOEElOTQlMjAlRUIlQjAlQTklRUIlQjIlOTUlRUMlOUQlODQlMjAlRUIlQjMlQjQlRUMlOTclQUMlRUMlQTQlOEQlRUIlOEIlODglRUIlOEIlQTQuJTBBJTIzJTBBJTIzJTIwMS4lMjBDUFUlMjAlRUMlOTglQTQlRUQlOTQlODQlRUIlQTElOUMlRUIlOTMlOUMlRUMlOTklODAlMjAlRUQlOTUlQTglRUElQkIlOTglMjAxJUVBJUIwJTlDJUVDJTlEJTk4JTIwR1BVJTIwJUVDJTgyJUFDJUVDJTlBJUE5JTBBJTIzJTIwMi4lMjAlRUIlOTglOTAlRUIlOEElOTQlMjAlRUMlOTclQUMlRUIlOUYlQUMlMjBHUFUlMjAlRUMlODIlQUMlRUMlOUElQTklMEElMjMlMEElMjMlMjAlRUIlQTglQkMlRUMlQTAlODAlMjBkZWVwc3BlZWQlRUIlQTUlQkMlMjAlRUMlODQlQTQlRUMlQjklOTglRUQlOTUlQjQlRUMlOTUlQkMlMjAlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQlM0ElMjBwaXAlMjBpbnN0YWxsJTIwZGVlcHNwZWVkJTBBJTIzJTBBJTIzJTIwJUVDJTk3JUFDJUVBJUI4JUIwJUVDJTg0JTlDJUVCJThBJTk0JTIwJUVDJTk1JUJEJTIwMTVHQiVFQyU5RCU5OCUyMEdQVSUyMFJBTSVFQyU5RCVCNCUyMCVFRCU5NSU4NCVFQyU5QSU5NCVFRCU5NSU5QyUyMDNCJTIwJTIyYmlnc2NpZW5jZSUyRlQwXzNCJTIyJTIwJUVCJUFBJUE4JUVCJThEJUI4JUVDJTlEJTg0JTIwJUVDJTgyJUFDJUVDJTlBJUE5JUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0JTIwLSUyMCVFQiU5NCVCMCVFQiU5RCVCQyVFQyU4NCU5QyUyMDElRUElQjAlOUMlRUMlOUQlOTglMjAlRUQlODElQjAlMjBHUFUlRUIlODIlOTglMjAyJUVBJUIwJTlDJUVDJTlEJTk4JTBBJTIzJTIwJUVDJTlFJTkxJUVDJTlEJTgwJTIwR1BVJUVCJUExJTlDJTIwJUVDJUIyJTk4JUVCJUE2JUFDJUVEJTk1JUEwJTIwJUVDJTg4JTk4JTIwJUVDJTlFJTg4JUVDJThBJUI1JUVCJThCJTg4JUVCJThCJUE0LiUyMCVFQiU5OCU5MCVFQiU4QSU5NCUyMDElRUElQjAlOUMlRUMlOUQlOTglMjAlRUMlOUUlOTElRUMlOUQlODAlMjBHUFUlRUMlOTklODAlMjAlRUIlQTclOEUlRUMlOUQlODAlMjBDUFUlMjAlRUIlQTklOTQlRUIlQUElQTglRUIlQTYlQUMlRUIlQTElOUMlRUIlOEYlODQlMjAlRUElQjAlODAlRUIlOEElQTUlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQuJTBBJTIzJTBBJTIzJTIwJUVDJTk1JUJEJTIwNTBHQiVFQSVCMCU4MCUyMCVFRCU5NSU4NCVFQyU5QSU5NCVFRCU5NSU5QyUyMCUyMmJpZ3NjaWVuY2UlMkZUMCUyMiVFQyU5OSU4MCUyMCVFQSVCMCU5OSVFQyU5RCU4MCUyMCVFQiU4RCU5NCUyMCVFRCU4MSVCMCUyMCVFQiVBQSVBOCVFQiU4RCVCOCVFQyU5RCU4NCUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSU5OCVFQiVBMCVBNCVFQiVBOSVCNCUyQyUyMDgwR0IlMjBHUFUlRUElQjAlODAlMjAlRUMlOTclODYlRUIlOEElOTQlMjAlRUQlOTUlOUMlMEElMjMlMjAyLTQlRUElQjAlOUMlRUMlOUQlOTglMjBHUFUlRUElQjAlODAlMjAlRUQlOTUlODQlRUMlOUElOTQlRUQlOTUlQTAlMjAlRUElQjIlODMlRUMlOUUlODUlRUIlOEIlODglRUIlOEIlQTQuJTIwJUVBJUI3JUI4JUVCJUE2JUFDJUVBJUIzJUEwJTIwJUVDJTk3JUFDJUVCJTlGJUFDJTIwJUVDJTlFJTg1JUVCJUEwJUE1JUVDJTlEJTg0JTIwJUVEJTk1JTlDJTIwJUVCJUIyJTg4JUVDJTk3JTkwJTIwJUVDJUIyJTk4JUVCJUE2JUFDJUVEJTk1JTk4JUVBJUIzJUEwJTIwJUVDJThCJUI2JUVCJThCJUE0JUVCJUE5JUI0JTBBJTIzJTIwJUVDJThBJUE0JUVEJTgxJUFDJUVCJUE2JUJEJUVEJThBJUI4JUVCJUE1JUJDJTIwJUVDJTg4JTk4JUVDJUEwJTk1JUVEJTk1JTk4JUVDJTk3JUFDJTIwJUVCJThEJTk0JTIwJUVCJUE3JThFJUVDJTlEJTgwJTIwR1BVJUVCJUE1JUJDJTIwJUVDJUIyJTk4JUVCJUE2JUFDJUVEJTk1JUEwJTIwJUVDJTg4JTk4JTIwJUVDJTlFJTg4JUVDJThBJUI1JUVCJThCJTg4JUVCJThCJUE0LiUwQSUyMyUwQSUyMyUyMCVFQyVBMCU5QyVFQSVCMyVCNSVFQiU5MCU5QyUyMGRlZXBzcGVlZCUyMCVFQyU4NCVBNCVFQyVBMCU5NSVFQyU5RCU4MCUyMENQVSUyMCVFQiVBOSU5NCVFQiVBQSVBOCVFQiVBNiVBQyUyMCVFQyU5OCVBNCVFRCU5NCU4NCVFQiVBMSU5QyVFQiU5NCVBOSVFQiU4RiU4NCUyMCVFRCU5OSU5QyVFQyU4NCVCMSVFRCU5OSU5NCVFRCU5NSU5OCVFQiVBRiU4MCVFQiVBMSU5QyUyQyUyMCVFQyU4MiVBQyVFQyU5QSVBOSUyMCVFQSVCMCU4MCVFQiU4QSVBNSVFRCU5NSU5QyUyMENQVSUyMCVFQiVBOSU5NCVFQiVBQSVBOCVFQiVBNiVBQyVFQSVCMCU4MCUyMCVFQiVBNyU4RSVFQSVCMyVBMCUwQSUyMyUyMCVFQyU4NiU4RCVFQiU4RiU4NCUyMCVFQyVBMCU4MCVFRCU5NSU5OCVFQiVBNSVCQyUyMCVFQSVCMCU5MCVFQyU4OCU5OCVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQiU4QiVBNCVFQiVBOSVCNCUyMCVFQyU5RCVCQyVFQiVCMCU5OCVFQyVBMCU4MSVFQyU5QyVCQyVFQiVBMSU5QyUyMCVFQiU4QiVBOCVFQyU5RCVCQyUyMEdQVSVFQyU5NyU5MCUyMCVFQiVBNyU5RSVFQyVBNyU4MCUyMCVFQyU5NSU4QSVFQiU4QSU5NCUyMCVFQiVBQSVBOCVFQiU4RCVCOCVFQyU5RCU4NCUyMCVFQiVBMSU5QyVFQiU5MyU5QyVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyU5RCU4NCUyMCVFQSVCMiU4MyVFQyU5RSU4NSVFQiU4QiU4OCVFQiU4QiVBNC4lMEElMjMlMjBHUFUlMjAlRUIlQTklOTQlRUIlQUElQTglRUIlQTYlQUMlRUElQjAlODAlMjAlRUMlQjYlQTklRUIlQjYlODQlRUQlOTUlOTglRUIlOEIlQTQlRUIlQTklQjQlMjBDUFUlRUIlQTElOUMlRUMlOUQlOTglMjAlRUMlOTglQTQlRUQlOTQlODQlRUIlQTElOUMlRUIlOTMlOUMlRUIlQTUlQkMlMjAlRUMlOUIlOTAlRUQlOTUlOTglRUMlQTclODAlMjAlRUMlOTUlOEElRUMlOUQlODQlMjAlRUIlOTUlOEMlMjAlRUQlOTQlODQlRUIlQTElOUMlRUElQjclQjglRUIlOUUlQTglRUMlOUQlQjQlMjAlRUIlOEQlOTQlMjAlRUIlQjklQTAlRUIlQTUlQjQlRUElQjIlOEMlMjAlRUMlOEIlQTQlRUQlOTYlODklRUIlOTAlQTAlMjAlRUElQjIlODMlRUMlOUUlODUlRUIlOEIlODglRUIlOEIlQTQlMjAtJTIwJUVBJUI3JUI4JUVCJTlGJUI0JTIwJUVCJTk1JThDJUVCJThBJTk0JTIwJUVEJTk1JUI0JUVCJThCJUI5JTIwJUVDJTg0JUI5JUVDJTg1JTk4JUVDJTlEJTg0JTIwJUVCJUI5JTg0JUVEJTk5JTlDJUVDJTg0JUIxJUVEJTk5JTk0JUVEJTk1JTk4JUVDJTg0JUI4JUVDJTlBJTk0LiUwQSUyMyUwQSUyMyUyMDElRUElQjAlOUMlRUMlOUQlOTglMjBHUFUlRUMlOTclOTAlMjAlRUIlQjAlQjAlRUQlOEYlQUMlRUQlOTUlOTglRUIlQTAlQTQlRUIlQTklQjQlM0ElMEElMjMlMEElMjMlMjBkZWVwc3BlZWQlMjAtLW51bV9ncHVzJTIwMSUyMHQwLnB5JTBBJTIzJTIwJUVCJTk4JTkwJUVCJThBJTk0JTNBJTBBJTIzJTIwcHl0aG9uJTIwLW0lMjB0b3JjaC5kaXN0cmlidXRlZC5ydW4lMjAtLW5wcm9jX3Blcl9ub2RlJTNEMSUyMHQwLnB5JTBBJTIzJTBBJTIzJTIwMiVFQSVCMCU5QyVFQyU5RCU5OCUyMEdQVSVFQyU5NyU5MCUyMCVFQiVCMCVCMCVFRCU4RiVBQyVFRCU5NSU5OCVFQiVBMCVBNCVFQiVBOSVCNCUzQSUwQSUyMyUwQSUyMyUyMGRlZXBzcGVlZCUyMC0tbnVtX2dwdXMlMjAyJTIwdDAucHklMEElMjMlMjAlRUIlOTglOTAlRUIlOEElOTQlM0ElMEElMjMlMjBweXRob24lMjAtbSUyMHRvcmNoLmRpc3RyaWJ1dGVkLnJ1biUyMC0tbnByb2NfcGVyX25vZGUlM0QyJTIwdDAucHklMEElMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Rva2VuaXplciUyQyUyMEF1dG9Db25maWclMkMlMjBBdXRvTW9kZWxGb3JTZXEyU2VxTE0lMEFmcm9tJTIwdHJhbnNmb3JtZXJzLmludGVncmF0aW9ucyUyMGltcG9ydCUyMEhmRGVlcFNwZWVkQ29uZmlnJTBBaW1wb3J0JTIwZGVlcHNwZWVkJTBBaW1wb3J0JTIwb3MlMEFpbXBvcnQlMjB0b3JjaCUwQSUwQW9zLmVudmlyb24lNUIlMjJUT0tFTklaRVJTX1BBUkFMTEVMSVNNJTIyJTVEJTIwJTNEJTIwJTIyZmFsc2UlMjIlMjAlMjAlMjMlMjAlRUQlODYlQTAlRUQlODElQUMlRUIlODIlOTglRUMlOUQlQjQlRUMlQTAlODAlRUMlOUQlOTglMjAlRUIlQjMlOTElRUIlQTAlQUMlMjAlRUMlQjIlOTglRUIlQTYlQUMlRUMlOTclOTAlMjAlRUElQjQlODAlRUQlOTUlOUMlMjAlRUElQjIlQkQlRUElQjMlQTAlRUIlQTUlQkMlMjAlRUQlOTQlQkMlRUQlOTUlOTglRUElQjglQjAlMjAlRUMlOUMlODQlRUQlOTUlQTglRUMlOUUlODUlRUIlOEIlODglRUIlOEIlQTQuJTBBJTBBJTIzJTIwJUVCJUI2JTg0JUVDJTgyJUIwJTIwJUVEJTk5JTk4JUVBJUIyJUJEJTIwJUVDJTg0JUE0JUVDJUEwJTk1JTBBbG9jYWxfcmFuayUyMCUzRCUyMGludChvcy5nZXRlbnYoJTIyTE9DQUxfUkFOSyUyMiUyQyUyMCUyMjAlMjIpKSUwQXdvcmxkX3NpemUlMjAlM0QlMjBpbnQob3MuZ2V0ZW52KCUyMldPUkxEX1NJWkUlMjIlMkMlMjAlMjIxJTIyKSklMEF0b3JjaC5jdWRhLnNldF9kZXZpY2UobG9jYWxfcmFuayklMEFkZWVwc3BlZWQuaW5pdF9kaXN0cmlidXRlZCgpJTBBJTBBbW9kZWxfbmFtZSUyMCUzRCUyMCUyMmJpZ3NjaWVuY2UlMkZUMF8zQiUyMiUwQSUwQWNvbmZpZyUyMCUzRCUyMEF1dG9Db25maWcuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX25hbWUpJTBBbW9kZWxfaGlkZGVuX3NpemUlMjAlM0QlMjBjb25maWcuZF9tb2RlbCUwQSUwQSUyMyUyMCVFQiVCMCVCMCVFQyVCOSU5OCUyMCVFRCU4MSVBQyVFQSVCOCVCMCVFQiU4QSU5NCUyMHdvcmxkX3NpemUlRUIlQTElOUMlMjAlRUIlODIlOTglRUIlODglODQlRUMlOTYlQjQlMjAlRUIlOTYlQTglRUMlOTYlQjQlRUMlQTAlQjglRUMlOTUlQkMlMjAlRUQlOTUlOTglRUMlQTclODAlRUIlQTclOEMlMkMlMjB3b3JsZF9zaXplJUVCJUIzJUI0JUVCJThCJUE0JTIwJUVEJTgxJUI0JTIwJUVDJTg4JTk4JTIwJUVDJTlFJTg4JUVDJThBJUI1JUVCJThCJTg4JUVCJThCJUE0JTBBdHJhaW5fYmF0Y2hfc2l6ZSUyMCUzRCUyMDElMjAqJTIwd29ybGRfc2l6ZSUwQSUwQSUyMyUyMGRzX2NvbmZpZyUyMCVFQyVCMCVCOCVFQSVCMyVBMCVFQyU4MiVBQyVFRCU5NSVBRCUwQSUyMyUwQSUyMyUyMC0lMjBBbXBlcmUlMjAlRUMlOUQlQjQlRUMlODMlODElRUMlOUQlOTglMjBHUFUlRUIlQTUlQkMlMjAlRUMlODIlQUMlRUMlOUElQTklRUQlOTUlOTglRUIlOEElOTQlMjAlRUElQjIlQkQlRUMlOUElQjAlMjBiZjE2JUVDJTlEJTg0JTIwJUVEJTk5JTlDJUVDJTg0JUIxJUVEJTk5JTk0JUVEJTk1JTk4JUVDJTg0JUI4JUVDJTlBJTk0JTIwLSUyMCVFQyU5RCVCNCVFQiU4QSU5NCUyMCVFRCU5OCVCQyVFRCU5NSVBOSUyMCVFQyVBMCU5NSVFQiVCMCU4MCVFQiU4RiU4NCVFQiVBMSU5QyUyMCVFQyU4QiVBNCVFRCU5NiU4OSVFQiU5MCU5OCVFQyU5NiVCNCUwQSUyMyUyMCVFQiU4RCU5NCUyMCVFQiVCOSVBMCVFQiVBNSVCQyUyMCVFQSVCMiU4MyVFQyU5RSU4NSVFQiU4QiU4OCVFQiU4QiVBNC4lMEElMjMlMEElMjMlMjAtJTIwJUVDJTk4JUE0JUVCJTlFJTk4JUVCJTkwJTlDJTIwR1BVJUVDJTlEJTk4JTIwJUVBJUIyJUJEJUVDJTlBJUIwJTIwZnAxNiVFQyU5RCU4NCUyMCVFRCU5OSU5QyVFQyU4NCVCMSVFRCU5OSU5NCVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyVBNyU4MCVFQiVBNyU4QyUyQyUyMGJmMTYlRUMlOUMlQkMlRUIlQTElOUMlMjAlRUMlODIlQUMlRUMlQTAlODQlMjAlRUQlOUIlODglRUIlQTAlQTglRUIlOTAlOTglRUMlQTclODAlMjAlRUMlOTUlOEElRUMlOUQlODAlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOTclOTAlRUMlODQlOUMlRUIlQTclOEMlMjAlRUMlOUUlOTElRUIlOEYlOTklRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQlMjAtJTIwJUVDJTk4JTg4JUVCJUE1JUJDJTIwJUVCJTkzJUE0JUVDJTk2JUI0JTBBJTIzJTIwJUVCJUFBJUE4JUVCJTkzJUEwJTIwJUVBJUIzJUI1JUVDJThCJTlEJTIwdDUlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOUQlODAlMjBiZjE2JUVDJTlDJUJDJUVCJUExJTlDJTIwJUVDJTgyJUFDJUVDJUEwJTg0JTIwJUVEJTlCJTg4JUVCJUEwJUE4JUVCJTkwJTk4JUVDJTk3JTg4JUVDJThBJUI1JUVCJThCJTg4JUVCJThCJUE0JTBBJTIzJTBBJTIzJTIwLSUyMENQVSUyMCVFQyU5OCVBNCVFRCU5NCU4NCVFQiVBMSU5QyVFQiU5MyU5QyVFQiVBNSVCQyUyMCVFQyU5QiU5MCVFRCU5NSU5OCVFQyVBNyU4MCUyMCVFQyU5NSU4QSVFQiU4QSU5NCVFQiU4QiVBNCVFQiVBOSVCNCUyMG9mZmxvYWRfcGFyYW0uZGV2aWNlJUVCJUE1JUJDJTIwJTIybm9uZSUyMiVFQyU5QyVCQyVFQiVBMSU5QyUyMCVFQyU4NCVBNCVFQyVBMCU5NSVFRCU5NSU5OCVFQSVCMSVCMCVFQiU4MiU5OCUyMCU2MG9mZmxvYWRfcGFyYW0lNjAlMjAlRUMlODQlQjklRUMlODUlOTglRUMlOUQlODQlMEElMjMlMjAlRUMlOTklODQlRUMlQTAlODQlRUQlOUUlODglMjAlRUMlQTAlOUMlRUElQjElQjAlRUQlOTUlOTglRUMlODQlQjglRUMlOUElOTQlMEElMjMlMEElMjMlMjAtJTIwJTYwb2ZmbG9hZF9wYXJhbSU2MCVFQyU5RCU4NCUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSU5OCVFQiU4QSU5NCUyMCVFQSVCMiVCRCVFQyU5QSVCMCUyQyUyMHN0YWdlM19wYXJhbV9wZXJzaXN0ZW5jZV90aHJlc2hvbGQlRUIlQTUlQkMlMjAlRUMlODglOTglRUIlOEYlOTklRUMlOUMlQkMlRUIlQTElOUMlMjAlRUIlQUYlQjglRUMlODQlQjglMjAlRUMlQTElQjAlRUMlQTAlOTUlRUQlOTUlOTglRUMlOTclQUMlMEElMjMlMjAlRUMlOTYlQjQlRUIlOTYlQTQlMjAlRUIlQTclQTQlRUElQjAlOUMlRUIlQjMlODAlRUMlODglOTglRUElQjAlODAlMjBHUFUlRUMlOTclOTAlMjAlRUIlODIlQTglRUMlOTUlODQlRUMlOUUlODglRUMlOTYlQjQlRUMlOTUlQkMlMjAlRUQlOTUlOTglRUIlOEElOTQlRUMlQTclODAlMjAlRUMlQTAlOUMlRUMlOTYlQjQlRUQlOTUlQTAlMjAlRUMlODglOTglMjAlRUMlOUUlODglRUMlOEElQjUlRUIlOEIlODglRUIlOEIlQTQlMjAtJTIwJUVBJUIwJTkyJUVDJTlEJUI0JTIwJUVEJTgxJUI0JUVDJTg4JTk4JUVCJUExJTlEJTIwJUVDJTk4JUE0JUVEJTk0JTg0JUVCJUExJTlDJUVCJTkzJTlDJTIwJUVEJTgxJUFDJUVBJUI4JUIwJUVBJUIwJTgwJTIwJUVDJTlFJTkxJUVDJTk1JTg0JUVDJUE3JTkxJUVCJThCJTg4JUVCJThCJUE0JTBBJTIzJTBBJTIzJTIwRGVlcHNwZWVkJTIwJUVDJTg0JUE0JUVDJUEwJTk1JUVDJTk3JTkwJTIwJUVCJThDJTgwJUVEJTk1JTlDJTIwJUVDJTlFJTkwJUVDJTg0JUI4JUVEJTk1JTlDJTIwJUVDJUEwJTk1JUVCJUIzJUI0JUVCJThBJTk0JTIwJUVCJThCJUE0JUVDJTlEJThDJUVDJTlEJTg0JTIwJUVDJUIwJUI4JUVDJUExJUIwJUVEJTk1JTk4JUVDJTg0JUI4JUVDJTlBJTk0JTBBJTIzJTIwaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRvY3MlMkZ0cmFuc2Zvcm1lcnMlMkZtYWluJTJGbWFpbl9jbGFzc2VzJTJGZGVlcHNwZWVkJTBBJTBBJTIzJTIwJUVDJTlEJUJDJUVBJUI0JTgwJUVDJTg0JUIxJUVDJTlEJTg0JTIwJUVDJTlDJTg0JUVEJTk1JUI0JTIwanNvbiVFQSVCMyVCQyUyMCVFQiU4RiU5OSVFQyU5RCVCQyVFRCU5NSU5QyUyMCVFRCU5OCU5NSVFQyU4QiU5RCVFQyU5RCU4NCUyMCVFQyU5QyVBMCVFQyVBNyU4MCVFRCU5NSU5OCVFQiU5MCU5OCUyQyUyMHRydWUlMkZmYWxzZSVFQyU5NyU5MCVFQiU4QSU5NCUyMCVFQyU4NiU4QyVFQiVBQyVCOCVFQyU5RSU5MCVFQiVBNSVCQyUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSVBOSVFQiU4QiU4OCVFQiU4QiVBNCUwQSUyMyUyMGZtdCUzQSUyMG9mZiUwQWRzX2NvbmZpZyUyMCUzRCUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMmZwMTYlMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJlbmFibGVkJTIyJTNBJTIwRmFsc2UlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjJiZjE2JTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZW5hYmxlZCUyMiUzQSUyMEZhbHNlJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwJTIyemVyb19vcHRpbWl6YXRpb24lMjIlM0ElMjAlN0IlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZSUyMiUzQSUyMDMlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvZmZsb2FkX3BhcmFtJTIyJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyZGV2aWNlJTIyJTNBJTIwJTIyY3B1JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycGluX21lbW9yeSUyMiUzQSUyMFRydWUlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJvdmVybGFwX2NvbW0lMjIlM0ElMjBUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIyY29udGlndW91c19ncmFkaWVudHMlMjIlM0ElMjBUcnVlJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIycmVkdWNlX2J1Y2tldF9zaXplJTIyJTNBJTIwbW9kZWxfaGlkZGVuX3NpemUlMjAqJTIwbW9kZWxfaGlkZGVuX3NpemUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZTNfcHJlZmV0Y2hfYnVja2V0X3NpemUlMjIlM0ElMjAwLjklMjAqJTIwbW9kZWxfaGlkZGVuX3NpemUlMjAqJTIwbW9kZWxfaGlkZGVuX3NpemUlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJzdGFnZTNfcGFyYW1fcGVyc2lzdGVuY2VfdGhyZXNob2xkJTIyJTNBJTIwMTAlMjAqJTIwbW9kZWxfaGlkZGVuX3NpemUlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjAlMjJzdGVwc19wZXJfcHJpbnQlMjIlM0ElMjAyMDAwJTJDJTBBJTIwJTIwJTIwJTIwJTIydHJhaW5fYmF0Y2hfc2l6ZSUyMiUzQSUyMHRyYWluX2JhdGNoX3NpemUlMkMlMEElMjAlMjAlMjAlMjAlMjJ0cmFpbl9taWNyb19iYXRjaF9zaXplX3Blcl9ncHUlMjIlM0ElMjAxJTJDJTBBJTIwJTIwJTIwJTIwJTIyd2FsbF9jbG9ja19icmVha2Rvd24lMjIlM0ElMjBGYWxzZSUwQSU3RCUwQSUyMyUyMGZtdCUzQSUyMG9uJTBBJTBBJTIzJTIwJUVCJThCJUE0JUVDJTlEJThDJTIwJUVDJUE0JTg0JUVDJTlEJTgwJTIwJUVCJUFBJUE4JUVCJThEJUI4JUVDJTlEJTk4JTIwJTYwZnJvbV9wcmV0cmFpbmVkJTYwJTIwJUVCJUE5JTk0JUVDJTg2JThDJUVCJTkzJTlDJUVBJUIwJTgwJTIwJUVEJTk4JUI4JUVDJUI2JTlDJUVCJTkwJUEwJTIwJUVCJTk1JThDJTBBJTIzJTIwZGVlcHNwZWVkLnplcm8uSW5pdCVFQiVBNSVCQyUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSU5OCVFQyU5NyVBQyUyMCVFQiVBQSVBOCVFQiU4RCVCOCVFQyU5RCU4NCUyMCVFQyU5NyVBQyVFQiU5RiVBQyUyMEdQVSVFQyU5NyU5MCUyMCVFQyVBNyU4MSVFQyVBMCU5MSUyMCVFQiVCNiU4NCVFRCU5NSVBMCVFRCU5NSU5OCVFQiU4RiU4NCVFQiVBMSU5RCUyMHRyYW5zZm9ybWVycyVFQyU5NyU5MCUyMCVFQyVBNyU4MCVFQyU4QiU5QyVFRCU5NSVBOSVFQiU4QiU4OCVFQiU4QiVBNC4lMEElMjMlMEElMjMlMjAqKiVFQyU5RCVCNCVFQiU4QSU5NCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfbmFtZSklRUIlQTElOUMlMjAlRUIlQUElQTglRUIlOEQlQjglRUMlOUQlODQlMjAlRUIlQTElOUMlRUIlOTMlOUMlRUQlOTUlOTglRUElQjglQjAlMjAlRUMlQTAlODQlRUMlOTclOTAlMjAlRUMlOEIlQTQlRUQlOTYlODklRUIlOTAlOTglRUMlOTYlQjQlRUMlOTUlQkMlMjAlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQqKiUwQSUyMyUwQSUyMyUyMCVFQSVCNyVCOCVFQiVBMCU4NyVFQyVBNyU4MCUyMCVFQyU5NSU4QSVFQyU5QyVCQyVFQiVBOSVCNCUyMCVFQiVBQSVBOCVFQiU4RCVCOCVFQyU5RCVCNCUyMCVFQiVBOCVCQyVFQyVBMCU4MCUyMCVFQyVBMCU5NSVFQyU4MyU4MSVFQyVBMCU4MSVFQyU5QyVCQyVFQiVBMSU5QyUyMCVFQiVBMSU5QyVFQiU5MyU5QyVFQiU5MCU5QyUyMCVFRCU5QiU4NCUyMCVFRCU4RiVBQyVFQyU5QiU4QyVFQiU5MyU5QyUyMCVFQyU4QiU5QyVFQyU5NyU5MCVFQiVBNyU4QyUyMCVFQiVCNiU4NCVFRCU5NSVBMCVFQiU5MCU5OCVFQiU4QSU5NCVFQiU4RCVCMCUyQyUyMCVFQyU5RCVCNCVFQiU4QSU5NCUwQSUyMyUyMCVFQiU4RCU5QyUyMCVFRCU5QSVBOCVFQyU5QyVBOCVFQyVBMCU4MSVFQyU5RCVCNCVFQiVBOSVCMCUyMENQVSUyMFJBTSVFQyU5RCVCNCUyMCVFQiVCNiU4MCVFQyVBMSVCMSVFRCU5NSVBMCUyMCVFQSVCMiVCRCVFQyU5QSVCMCUyMCVFQyU4QiVBNCVFRCU4QyVBOCVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyU4QSVCNSVFQiU4QiU4OCVFQiU4QiVBNCUwQWRzY2hmJTIwJTNEJTIwSGZEZWVwU3BlZWRDb25maWcoZHNfY29uZmlnKSUyMCUyMCUyMyUyMCVFQyU5RCVCNCUyMCVFQSVCMCU5RCVFQyVCMiVCNCVFQiVBNSVCQyUyMCVFQyU5QyVBMCVFQyVBNyU4MCVFRCU5NSU5OCVFQyU4NCVCOCVFQyU5QSU5NCUwQSUwQSUyMyUyMCVFQyU5RCVCNCVFQyVBMCU5QyUyMCVFQiVBQSVBOCVFQiU4RCVCOCVFQyU5RCU4NCUyMCVFQiVBMSU5QyVFQiU5MyU5QyVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyU4QSVCNSVFQiU4QiU4OCVFQiU4QiVBNC4lMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvclNlcTJTZXFMTS5mcm9tX3ByZXRyYWluZWQobW9kZWxfbmFtZSklMEElMEElMjMlMjBEZWVwc3BlZWQlMjBaZVJPJUVCJUE1JUJDJTIwJUVDJUI0JTg4JUVBJUI4JUIwJUVEJTk5JTk0JUVEJTk1JTk4JUVBJUIzJUEwJTIwJUVDJTk3JTk0JUVDJUE3JTg0JTIwJUVBJUIwJTlEJUVDJUIyJUI0JUVCJUE3JThDJTIwJUVDJUEwJTgwJUVDJTlFJUE1JTBBZHNfZW5naW5lJTIwJTNEJTIwZGVlcHNwZWVkLmluaXRpYWxpemUobW9kZWwlM0Rtb2RlbCUyQyUyMGNvbmZpZ19wYXJhbXMlM0Rkc19jb25maWcpJTVCMCU1RCUwQWRzX2VuZ2luZS5tb2R1bGUuZXZhbCgpJTIwJTIwJTIzJTIwaW5mZXJlbmNlJTBBJTBBJTIzJTIwRGVlcHNwZWVkJTIwWmVSTyVFQiU4QSU5NCUyMCVFQSVCMCU4MSUyMEdQVSVFQyU5NyU5MCVFQyU4NCU5QyUyMCVFQyU4NCU5QyVFQiVBMSU5QyUyMCVFQSVCNCU4MCVFQiVBMCVBOCUyMCVFQyU5NyU4NiVFQiU4QSU5NCUyMCVFQyU5RSU4NSVFQiVBMCVBNSVFQyU5RCU4NCUyMCVFQyVCMiU5OCVFQiVBNiVBQyVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyU4QSVCNSVFQiU4QiU4OCVFQiU4QiVBNC4lMjAlRUIlOTQlQjAlRUIlOUQlQkMlRUMlODQlOUMlMjAyJUVBJUIwJTlDJUVDJTlEJTk4JTIwR1BVJUVCJUE1JUJDJTIwJUVDJTgyJUFDJUVDJTlBJUE5JUVEJTk1JTk4JUVCJUE5JUI0JTIwJUVEJTk1JTlDJTIwJUVCJUIyJTg4JUVDJTk3JTkwJTIwMiVFQSVCMCU5QyVFQyU5RCU5OCUyMCVFQyU5RSU4NSVFQiVBMCVBNSVFQyU5RCU4NCUyMCVFQyVCMiU5OCVFQiVBNiVBQyVFRCU5NSVBMCUyMCVFQyU4OCU5OCUyMCVFQyU5RSU4OCVFQyU4QSVCNSVFQiU4QiU4OCVFQiU4QiVBNC4lMEElMjMlMjBHUFUlRUIlQTUlQkMlMjAlRUIlOEQlOTQlMjAlRUIlQTclOEUlRUMlOUQlQjQlMjAlRUMlODIlQUMlRUMlOUElQTklRUQlOTUlOTglRUIlOEElOTQlMjAlRUElQjIlQkQlRUMlOUElQjAlMjAlRUElQjclQjglRUMlOTclOTAlMjAlRUIlQTclOUUlRUElQjIlOEMlMjAlRUMlQTElQjAlRUMlQTAlOTUlRUQlOTUlOTglRUMlODQlQjglRUMlOUElOTQuJTBBJTBBJTIzJTIwJUVCJUFDJUJDJUVCJUExJUEwJTIwJUVDJUIyJTk4JUVCJUE2JUFDJUVEJTk1JUEwJTIwJUVDJTlFJTg1JUVCJUEwJUE1JUVDJTlEJUI0JTIwJUVEJTk1JTk4JUVCJTgyJTk4JUVCJUJGJTkwJUVDJTlEJUI0JUVCJTlEJUJDJUVCJUE5JUI0JTIwJUVCJTkxJTkwJTIwR1BVJUVDJTk3JTkwJTIwJUVCJThGJTk5JUVDJTlEJUJDJUVEJTk1JTlDJTIwJUVCJUFDJUI4JUVDJTlFJTkwJUVDJTk3JUI0JUVDJTlEJTg0JTIwJUVDJUEwJTg0JUVCJThCJUFDJUVEJTk1JUI0JUVDJTk1JUJDJTIwJUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQSUyMyUyMEdQVSVFQiVBNSVCQyUyMCVFRCU5NSU5OCVFQiU4MiU5OCVFQiVBNyU4QyUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSU5OCVFQiU4QSU5NCUyMCVFQSVCMiVCRCVFQyU5QSVCMCVFQyU5NyU5MCVFQiU4QSU5NCUyMHJhbmslMjAwJUVCJUE3JThDJTIwJUVBJUIwJTk2JUVBJUIyJThDJTIwJUVCJTkwJUE5JUVCJThCJTg4JUVCJThCJUE0LiUwQXJhbmslMjAlM0QlMjB0b3JjaC5kaXN0cmlidXRlZC5nZXRfcmFuaygpJTBBaWYlMjByYW5rJTIwJTNEJTNEJTIwMCUzQSUwQSUyMCUyMCUyMCUyMHRleHRfaW4lMjAlM0QlMjAlMjJJcyUyMHRoaXMlMjByZXZpZXclMjBwb3NpdGl2ZSUyMG9yJTIwbmVnYXRpdmUlM0YlMjBSZXZpZXclM0ElMjB0aGlzJTIwaXMlMjB0aGUlMjBiZXN0JTIwY2FzdCUyMGlyb24lMjBza2lsbGV0JTIweW91JTIwd2lsbCUyMGV2ZXIlMjBidXklMjIlMEFlbGlmJTIwcmFuayUyMCUzRCUzRCUyMDElM0ElMEElMjAlMjAlMjAlMjB0ZXh0X2luJTIwJTNEJTIwJTIySXMlMjB0aGlzJTIwcmV2aWV3JTIwcG9zaXRpdmUlMjBvciUyMG5lZ2F0aXZlJTNGJTIwUmV2aWV3JTNBJTIwdGhpcyUyMGlzJTIwdGhlJTIwd29yc3QlMjByZXN0YXVyYW50JTIwZXZlciUyMiUwQSUwQXRva2VuaXplciUyMCUzRCUyMEF1dG9Ub2tlbml6ZXIuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX25hbWUpJTBBaW5wdXRzJTIwJTNEJTIwdG9rZW5pemVyLmVuY29kZSh0ZXh0X2luJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMikudG8oZGV2aWNlJTNEbG9jYWxfcmFuayklMEF3aXRoJTIwdG9yY2gubm9fZ3JhZCgpJTNBJTBBJTIwJTIwJTIwJTIwb3V0cHV0cyUyMCUzRCUyMGRzX2VuZ2luZS5tb2R1bGUuZ2VuZXJhdGUoaW5wdXRzJTJDJTIwc3luY2VkX2dwdXMlM0RUcnVlKSUwQXRleHRfb3V0JTIwJTNEJTIwdG9rZW5pemVyLmRlY29kZShvdXRwdXRzJTVCMCU1RCUyQyUyMHNraXBfc3BlY2lhbF90b2tlbnMlM0RUcnVlKSUwQXByaW50KGYlMjJyYW5rJTdCcmFuayU3RCUzQSU1Q24lMjAlMjAlMjBpbiUzRCU3QnRleHRfaW4lN0QlNUNuJTIwJTIwb3V0JTNEJTdCdGV4dF9vdXQlN0QlMjIp",highlighted:`<span class="hljs-comment">#!/usr/bin/env python</span> | |
| <span class="hljs-comment"># 이 스크립트는 단일 GPU에 모델을 맞출 수 없을 때 추론 모드에서 Deepspeed ZeRO를 사용하는 방법을 보여줍니다.</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 1. CPU 오프로드와 함께 1개의 GPU 사용</span> | |
| <span class="hljs-comment"># 2. 또는 여러 GPU 사용</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 먼저 deepspeed를 설치해야 합니다: pip install deepspeed</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 여기서는 약 15GB의 GPU RAM이 필요한 3B "bigscience/T0_3B" 모델을 사용합니다 - 따라서 1개의 큰 GPU나 2개의</span> | |
| <span class="hljs-comment"># 작은 GPU로 처리할 수 있습니다. 또는 1개의 작은 GPU와 많은 CPU 메모리로도 가능합니다.</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 약 50GB가 필요한 "bigscience/T0"와 같은 더 큰 모델을 사용하려면, 80GB GPU가 없는 한</span> | |
| <span class="hljs-comment"># 2-4개의 GPU가 필요할 것입니다. 그리고 여러 입력을 한 번에 처리하고 싶다면</span> | |
| <span class="hljs-comment"># 스크립트를 수정하여 더 많은 GPU를 처리할 수 있습니다.</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 제공된 deepspeed 설정은 CPU 메모리 오프로딩도 활성화하므로, 사용 가능한 CPU 메모리가 많고</span> | |
| <span class="hljs-comment"># 속도 저하를 감수할 수 있다면 일반적으로 단일 GPU에 맞지 않는 모델을 로드할 수 있을 것입니다.</span> | |
| <span class="hljs-comment"># GPU 메모리가 충분하다면 CPU로의 오프로드를 원하지 않을 때 프로그램이 더 빠르게 실행될 것입니다 - 그럴 때는 해당 섹션을 비활성화하세요.</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 1개의 GPU에 배포하려면:</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># deepspeed --num_gpus 1 t0.py</span> | |
| <span class="hljs-comment"># 또는:</span> | |
| <span class="hljs-comment"># python -m torch.distributed.run --nproc_per_node=1 t0.py</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 2개의 GPU에 배포하려면:</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># deepspeed --num_gpus 2 t0.py</span> | |
| <span class="hljs-comment"># 또는:</span> | |
| <span class="hljs-comment"># python -m torch.distributed.run --nproc_per_node=2 t0.py</span> | |
| <span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoConfig, AutoModelForSeq2SeqLM | |
| <span class="hljs-keyword">from</span> transformers.integrations <span class="hljs-keyword">import</span> HfDeepSpeedConfig | |
| <span class="hljs-keyword">import</span> deepspeed | |
| <span class="hljs-keyword">import</span> os | |
| <span class="hljs-keyword">import</span> torch | |
| os.environ[<span class="hljs-string">"TOKENIZERS_PARALLELISM"</span>] = <span class="hljs-string">"false"</span> <span class="hljs-comment"># 토크나이저의 병렬 처리에 관한 경고를 피하기 위함입니다.</span> | |
| <span class="hljs-comment"># 분산 환경 설정</span> | |
| local_rank = <span class="hljs-built_in">int</span>(os.getenv(<span class="hljs-string">"LOCAL_RANK"</span>, <span class="hljs-string">"0"</span>)) | |
| world_size = <span class="hljs-built_in">int</span>(os.getenv(<span class="hljs-string">"WORLD_SIZE"</span>, <span class="hljs-string">"1"</span>)) | |
| torch.cuda.set_device(local_rank) | |
| deepspeed.init_distributed() | |
| model_name = <span class="hljs-string">"bigscience/T0_3B"</span> | |
| config = AutoConfig.from_pretrained(model_name) | |
| model_hidden_size = config.d_model | |
| <span class="hljs-comment"># 배치 크기는 world_size로 나누어 떨어져야 하지만, world_size보다 클 수 있습니다</span> | |
| train_batch_size = <span class="hljs-number">1</span> * world_size | |
| <span class="hljs-comment"># ds_config 참고사항</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># - Ampere 이상의 GPU를 사용하는 경우 bf16을 활성화하세요 - 이는 혼합 정밀도로 실행되어</span> | |
| <span class="hljs-comment"># 더 빠를 것입니다.</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># - 오래된 GPU의 경우 fp16을 활성화할 수 있지만, bf16으로 사전 훈련되지 않은 모델에서만 작동합니다 - 예를 들어</span> | |
| <span class="hljs-comment"># 모든 공식 t5 모델은 bf16으로 사전 훈련되었습니다</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># - CPU 오프로드를 원하지 않는다면 offload_param.device를 "none"으로 설정하거나 \`offload_param\` 섹션을</span> | |
| <span class="hljs-comment"># 완전히 제거하세요</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># - \`offload_param\`을 사용하는 경우, stage3_param_persistence_threshold를 수동으로 미세 조정하여</span> | |
| <span class="hljs-comment"># 어떤 매개변수가 GPU에 남아있어야 하는지 제어할 수 있습니다 - 값이 클수록 오프로드 크기가 작아집니다</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># Deepspeed 설정에 대한 자세한 정보는 다음을 참조하세요</span> | |
| <span class="hljs-comment"># https://huggingface.co/docs/transformers/main/main_classes/deepspeed</span> | |
| <span class="hljs-comment"># 일관성을 위해 json과 동일한 형식을 유지하되, true/false에는 소문자를 사용합니다</span> | |
| <span class="hljs-comment"># fmt: off</span> | |
| ds_config = { | |
| <span class="hljs-string">"fp16"</span>: { | |
| <span class="hljs-string">"enabled"</span>: <span class="hljs-literal">False</span> | |
| }, | |
| <span class="hljs-string">"bf16"</span>: { | |
| <span class="hljs-string">"enabled"</span>: <span class="hljs-literal">False</span> | |
| }, | |
| <span class="hljs-string">"zero_optimization"</span>: { | |
| <span class="hljs-string">"stage"</span>: <span class="hljs-number">3</span>, | |
| <span class="hljs-string">"offload_param"</span>: { | |
| <span class="hljs-string">"device"</span>: <span class="hljs-string">"cpu"</span>, | |
| <span class="hljs-string">"pin_memory"</span>: <span class="hljs-literal">True</span> | |
| }, | |
| <span class="hljs-string">"overlap_comm"</span>: <span class="hljs-literal">True</span>, | |
| <span class="hljs-string">"contiguous_gradients"</span>: <span class="hljs-literal">True</span>, | |
| <span class="hljs-string">"reduce_bucket_size"</span>: model_hidden_size * model_hidden_size, | |
| <span class="hljs-string">"stage3_prefetch_bucket_size"</span>: <span class="hljs-number">0.9</span> * model_hidden_size * model_hidden_size, | |
| <span class="hljs-string">"stage3_param_persistence_threshold"</span>: <span class="hljs-number">10</span> * model_hidden_size | |
| }, | |
| <span class="hljs-string">"steps_per_print"</span>: <span class="hljs-number">2000</span>, | |
| <span class="hljs-string">"train_batch_size"</span>: train_batch_size, | |
| <span class="hljs-string">"train_micro_batch_size_per_gpu"</span>: <span class="hljs-number">1</span>, | |
| <span class="hljs-string">"wall_clock_breakdown"</span>: <span class="hljs-literal">False</span> | |
| } | |
| <span class="hljs-comment"># fmt: on</span> | |
| <span class="hljs-comment"># 다음 줄은 모델의 \`from_pretrained\` 메소드가 호출될 때</span> | |
| <span class="hljs-comment"># deepspeed.zero.Init를 사용하여 모델을 여러 GPU에 직접 분할하도록 transformers에 지시합니다.</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># **이는 AutoModelForSeq2SeqLM.from_pretrained(model_name)로 모델을 로드하기 전에 실행되어야 합니다**</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># 그렇지 않으면 모델이 먼저 정상적으로 로드된 후 포워드 시에만 분할되는데, 이는</span> | |
| <span class="hljs-comment"># 덜 효율적이며 CPU RAM이 부족할 경우 실패할 수 있습니다</span> | |
| dschf = HfDeepSpeedConfig(ds_config) <span class="hljs-comment"># 이 객체를 유지하세요</span> | |
| <span class="hljs-comment"># 이제 모델을 로드할 수 있습니다.</span> | |
| model = AutoModelForSeq2SeqLM.from_pretrained(model_name) | |
| <span class="hljs-comment"># Deepspeed ZeRO를 초기화하고 엔진 객체만 저장</span> | |
| ds_engine = deepspeed.initialize(model=model, config_params=ds_config)[<span class="hljs-number">0</span>] | |
| ds_engine.module.<span class="hljs-built_in">eval</span>() <span class="hljs-comment"># inference</span> | |
| <span class="hljs-comment"># Deepspeed ZeRO는 각 GPU에서 서로 관련 없는 입력을 처리할 수 있습니다. 따라서 2개의 GPU를 사용하면 한 번에 2개의 입력을 처리할 수 있습니다.</span> | |
| <span class="hljs-comment"># GPU를 더 많이 사용하는 경우 그에 맞게 조정하세요.</span> | |
| <span class="hljs-comment"># 물론 처리할 입력이 하나뿐이라면 두 GPU에 동일한 문자열을 전달해야 합니다.</span> | |
| <span class="hljs-comment"># GPU를 하나만 사용하는 경우에는 rank 0만 갖게 됩니다.</span> | |
| rank = torch.distributed.get_rank() | |
| <span class="hljs-keyword">if</span> rank == <span class="hljs-number">0</span>: | |
| text_in = <span class="hljs-string">"Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy"</span> | |
| <span class="hljs-keyword">elif</span> rank == <span class="hljs-number">1</span>: | |
| text_in = <span class="hljs-string">"Is this review positive or negative? Review: this is the worst restaurant ever"</span> | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| inputs = tokenizer.encode(text_in, return_tensors=<span class="hljs-string">"pt"</span>).to(device=local_rank) | |
| <span class="hljs-keyword">with</span> torch.no_grad(): | |
| outputs = ds_engine.module.generate(inputs, synced_gpus=<span class="hljs-literal">True</span>) | |
| text_out = tokenizer.decode(outputs[<span class="hljs-number">0</span>], skip_special_tokens=<span class="hljs-literal">True</span>) | |
| <span class="hljs-built_in">print</span>(<span class="hljs-string">f"rank<span class="hljs-subst">{rank}</span>:\\n in=<span class="hljs-subst">{text_in}</span>\\n out=<span class="hljs-subst">{text_out}</span>"</span>)`,wrap:!1}}),Ke=new g({props:{code:"JTI0JTIwZGVlcHNwZWVkJTIwLS1udW1fZ3B1cyUyMDIlMjB0MC5weSUwQXJhbmswJTNBJTBBJTIwJTIwJTIwaW4lM0RJcyUyMHRoaXMlMjByZXZpZXclMjBwb3NpdGl2ZSUyMG9yJTIwbmVnYXRpdmUlM0YlMjBSZXZpZXclM0ElMjB0aGlzJTIwaXMlMjB0aGUlMjBiZXN0JTIwY2FzdCUyMGlyb24lMjBza2lsbGV0JTIweW91JTIwd2lsbCUyMGV2ZXIlMjBidXklMEElMjAlMjBvdXQlM0RQb3NpdGl2ZSUwQXJhbmsxJTNBJTBBJTIwJTIwJTIwaW4lM0RJcyUyMHRoaXMlMjByZXZpZXclMjBwb3NpdGl2ZSUyMG9yJTIwbmVnYXRpdmUlM0YlMjBSZXZpZXclM0ElMjB0aGlzJTIwaXMlMjB0aGUlMjB3b3JzdCUyMHJlc3RhdXJhbnQlMjBldmVyJTBBJTIwJTIwb3V0JTNEbmVnYXRpdmU=",highlighted:`$ deepspeed --num_gpus 2 t0.py | |
| rank0: | |
| <span class="hljs-keyword">in</span>=Is this review positive or negative? Review: this is the best cast iron skillet you will ever buy | |
| out=Positive | |
| rank1: | |
| <span class="hljs-keyword">in</span>=Is this review positive or negative? Review: this is the worst restaurant ever | |
| out=negative`,wrap:!1}}),et=new k({props:{title:"생성",local:"generate",headingTag:"h3"}}),Mt=new k({props:{title:"트러블슈팅",local:"troubleshoot",headingTag:"h2"}}),pt=new g({props:{code:"cHl0aG9uJTIwLWMlMjAnaW1wb3J0JTIwdG9yY2glM0IlMjBwcmludChmJTIydG9yY2glM0ElMjAlN0J0b3JjaC5fX3ZlcnNpb25fXyU3RCUyMiknJTBBcHl0aG9uJTIwLWMlMjAnaW1wb3J0JTIwdHJhbnNmb3JtZXJzJTNCJTIwcHJpbnQoZiUyMnRyYW5zZm9ybWVycyUzQSUyMCU3QnRyYW5zZm9ybWVycy5fX3ZlcnNpb25fXyU3RCUyMiknJTBBcHl0aG9uJTIwLWMlMjAnaW1wb3J0JTIwZGVlcHNwZWVkJTNCJTIwcHJpbnQoZiUyMmRlZXBzcGVlZCUzQSUyMCU3QmRlZXBzcGVlZC5fX3ZlcnNpb25fXyU3RCUyMikn",highlighted:`python -c <span class="hljs-string">'import torch; print(f"torch: {torch.__version__}")'</span> | |
| python -c <span class="hljs-string">'import transformers; print(f"transformers: {transformers.__version__}")'</span> | |
| python -c <span class="hljs-string">'import deepspeed; print(f"deepspeed: {deepspeed.__version__}")'</span>`,wrap:!1}}),ot=new k({props:{title:"DeepSpeed 프로세스가 시작 단계에서 종료되었을 경우",local:"deepspeed-process-killed-at-startup",headingTag:"h3"}}),wt=new k({props:{title:"NaN 손실",local:"nan-loss",headingTag:"h3"}}),ct=new g({props:{code:"JTdCJTBBJTIwJTIwJTIwJTIwJTIyZnAxNiUyMiUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMmVuYWJsZWQlMjIlM0ElMjAlMjJhdXRvJTIyJTJDJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIybG9zc19zY2FsZSUyMiUzQSUyMDAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJsb3NzX3NjYWxlX3dpbmRvdyUyMiUzQSUyMDEwMDAlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJpbml0aWFsX3NjYWxlX3Bvd2VyJTIyJTNBJTIwMTYlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjJoeXN0ZXJlc2lzJTIyJTNBJTIwMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMm1pbl9sb3NzX3NjYWxlJTIyJTNBJTIwMSUwQSUyMCUyMCUyMCUyMCU3RCUwQSU3RA==",highlighted:`{ | |
| <span class="hljs-attr">"fp16":</span> { | |
| <span class="hljs-attr">"enabled":</span> <span class="hljs-string">"auto"</span>, | |
| <span class="hljs-attr">"loss_scale":</span> <span class="hljs-number">0</span>, | |
| <span class="hljs-attr">"loss_scale_window":</span> <span class="hljs-number">1000</span>, | |
| <span class="hljs-attr">"initial_scale_power":</span> <span class="hljs-number">16</span>, | |
| <span class="hljs-attr">"hysteresis":</span> <span class="hljs-number">2</span>, | |
| <span class="hljs-attr">"min_loss_scale":</span> <span class="hljs-number">1</span> | |
| } | |
| }`,wrap:!1}}),mt=new g({props:{code:"MCUyNSU3QyUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QyUyMDAlMkYxODklMjAlNUIwMCUzQTAwJTNDJTNGJTJDJTIwJTNGaXQlMkZzJTVEJTBBJTIwJTVCZGVlcHNjYWxlJTVEJTIwT1ZFUkZMT1chJTIwUmFuayUyMDAlMjBTa2lwcGluZyUyMHN0ZXAuJTIwQXR0ZW1wdGVkJTIwbG9zcyUyMHNjYWxlJTNBJTIwMjYyMTQ0JTJDJTIwcmVkdWNpbmclMjB0byUyMDI2MjE0NCUwQSUyMCUyMDElMjUlN0MlRTIlOTYlOEMlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMjAxJTJGMTg5JTIwJTVCMDAlM0EwMCUzQzAxJTNBMjYlMkMlMjAlMjAyLjE3aXQlMkZzJTVEJTBBJTIwJTVCZGVlcHNjYWxlJTVEJTIwT1ZFUkZMT1chJTIwUmFuayUyMDAlMjBTa2lwcGluZyUyMHN0ZXAuJTIwQXR0ZW1wdGVkJTIwbG9zcyUyMHNjYWxlJTNBJTIwMjYyMTQ0JTJDJTIwcmVkdWNpbmclMjB0byUyMDEzMTA3Mi4wJTBBJTIwJTIwMSUyNSU3QyVFMiU5NiU4OCVFMiU5NiU4RiUwQSUyMCU1Qi4uLiU1RCUwQSUyMCU1QmRlZXBzY2FsZSU1RCUyME9WRVJGTE9XISUyMFJhbmslMjAwJTIwU2tpcHBpbmclMjBzdGVwLiUyMEF0dGVtcHRlZCUyMGxvc3MlMjBzY2FsZSUzQSUyMDElMkMlMjByZWR1Y2luZyUyMHRvJTIwMSUwQSUyMDE0JTI1JTdDJUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JTg4JUUyJTk2JThDJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTdDJTIwMjclMkYxODklMjAlNUIwMCUzQTE0JTNDMDElM0ExMyUyQyUyMCUyMDIuMjFpdCUyRnMlNUQlMEElMjAlNUJkZWVwc2NhbGUlNUQlMjBPVkVSRkxPVyElMjBSYW5rJTIwMCUyMFNraXBwaW5nJTIwc3RlcC4lMjBBdHRlbXB0ZWQlMjBsb3NzJTIwc2NhbGUlM0ElMjAxJTJDJTIwcmVkdWNpbmclMjB0byUyMDElMEElMjAxNSUyNSU3QyVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4OCVFMiU5NiU4RiUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCU3QyUyMDI4JTJGMTg5JTIwJTVCMDAlM0ExNCUzQzAxJTNBMTMlMkMlMjAlMjAyLjE4aXQlMkZzJTVEJTBBJTIwJTVCZGVlcHNjYWxlJTVEJTIwT1ZFUkZMT1chJTIwUmFuayUyMDAlMjBTa2lwcGluZyUyMHN0ZXAuJTIwQXR0ZW1wdGVkJTIwbG9zcyUyMHNjYWxlJTNBJTIwMSUyQyUyMHJlZHVjaW5nJTIwdG8lMjAxJTBBJTIwMTUlMjUlN0MlRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlODglRTIlOTYlOEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlN0MlMjAyOSUyRjE4OSUyMCU1QjAwJTNBMTUlM0MwMSUzQTEzJTJDJTIwJTIwMi4xOGl0JTJGcyU1RCUwQSUyMCU1QmRlZXBzY2FsZSU1RCUyME9WRVJGTE9XISUyMFJhbmslMjAwJTIwU2tpcHBpbmclMjBzdGVwLiUyMEF0dGVtcHRlZCUyMGxvc3MlMjBzY2FsZSUzQSUyMDElMkMlMjByZWR1Y2luZyUyMHRvJTIwMSUwQSU1Qi4uLiU1RA==",highlighted:`0%| | 0/189 [00:00<?, ?it/s] | |
| [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 262144 | |
| 1%|▌ | 1/189 [00:00<01:26, 2.17it/s] | |
| [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 262144, reducing to 131072.0 | |
| 1%|█▏ | |
| [...] | |
| [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1 | |
| 14%|████████████████▌ | 27/189 [00:14<01:13, 2.21it/s] | |
| [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1 | |
| 15%|█████████████████▏ | 28/189 [00:14<01:13, 2.18it/s] | |
| [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1 | |
| 15%|█████████████████▊ | 29/189 [00:15<01:13, 2.18it/s] | |
| [deepscale] OVERFLOW! Rank 0 Skipping step. Attempted loss scale: 1, reducing to 1 | |
| [...]`,wrap:!1}}),ft=new k({props:{title:"리소스",local:"resources",headingTag:"h2"}}),$t=new Zn({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/ko/deepspeed.md"}}),{c(){s=o("meta"),i=a(),U=o("p"),p=a(),r(n.$$.fragment),J=a(),y=o("p"),y.innerHTML=Q,f=a(),R=o("ul"),R.innerHTML=E,b=a(),Z=o("p"),Z.innerHTML=w,B=a(),S=o("p"),S.innerHTML=O,_=a(),r(h.$$.fragment),D=a(),G=o("p"),G.innerHTML=Y,L=a(),r(H.$$.fragment),X=a(),r(K.$$.fragment),ll=a(),r(q.$$.fragment),el=a(),v=o("p"),v.innerHTML=P,$=a(),r(F.$$.fragment),sl=a(),tl=o("p"),tl.textContent=Ml,nl=a(),x=o("p"),x.textContent=ml,V=a(),r(N.$$.fragment),dl=a(),Ul=o("p"),Ul.textContent=HM,Rt=a(),fl=o("table"),fl.innerHTML=xM,Bt=a(),Vl=o("p"),Vl.textContent=YM,_t=a(),Ql=o("p"),Ql.textContent=LM,gt=a(),Al=o("ol"),Al.innerHTML=PM,bt=a(),r($l.$$.fragment),Et=a(),hl=o("p"),hl.innerHTML=KM,Zt=a(),r(Tl.$$.fragment),Nt=a(),Rl=o("p"),Rl.innerHTML=lU,Ft=a(),r(pl.$$.fragment),St=a(),r(Bl.$$.fragment),kt=a(),_l=o("p"),_l.textContent=eU,Ot=a(),gl=o("ol"),gl.innerHTML=tU,Dt=a(),bl=o("p"),bl.innerHTML=sU,Wt=a(),El=o("ol"),El.innerHTML=MU,qt=a(),Zl=o("p"),Zl.innerHTML=UU,vt=a(),r(Nl.$$.fragment),Gt=a(),Fl=o("p"),Fl.innerHTML=nU,Xt=a(),r(yl.$$.fragment),zt=a(),Sl=o("p"),Sl.innerHTML=JU,Ht=a(),r(il.$$.fragment),xt=a(),r(kl.$$.fragment),Yt=a(),Ol=o("p"),Ol.innerHTML=aU,Lt=a(),Dl=o("p"),Dl.innerHTML=TU,Pt=a(),Wl=o("p"),Wl.innerHTML=pU,Kt=a(),r(ql.$$.fragment),ls=a(),r(vl.$$.fragment),es=a(),Gl=o("p"),Gl.textContent=yU,ts=a(),r(Xl.$$.fragment),ss=a(),zl=o("p"),zl.textContent=iU,Ms=a(),Hl=o("ol"),Hl.innerHTML=oU,Us=a(),r(xl.$$.fragment),ns=a(),Yl=o("p"),Yl.innerHTML=jU,Js=a(),r(ol.$$.fragment),as=a(),r(jl.$$.fragment),Ts=a(),r(Ll.$$.fragment),ps=a(),Pl=o("p"),Pl.textContent=CU,ys=a(),r(Cl.$$.fragment),is=a(),r(Kl.$$.fragment),os=a(),le=o("p"),le.innerHTML=wU,js=a(),r(ee.$$.fragment),Cs=a(),r(te.$$.fragment),ws=a(),se=o("p"),se.innerHTML=rU,rs=a(),r(Me.$$.fragment),Is=a(),r(Ue.$$.fragment),cs=a(),ne=o("p"),ne.innerHTML=IU,us=a(),r(Je.$$.fragment),ms=a(),r(ae.$$.fragment),ds=a(),Te=o("p"),Te.textContent=cU,fs=a(),pe=o("p"),pe.textContent=uU,Vs=a(),ye=o("p"),ye.textContent=mU,Qs=a(),ie=o("p"),ie.innerHTML=dU,As=a(),r(oe.$$.fragment),$s=a(),r(je.$$.fragment),hs=a(),Ce=o("p"),Ce.innerHTML=fU,Rs=a(),we=o("p"),we.innerHTML=VU,Bs=a(),r(wl.$$.fragment),_s=a(),r(re.$$.fragment),gs=a(),Ie=o("p"),Ie.innerHTML=QU,bs=a(),ce=o("p"),ce.innerHTML=AU,Es=a(),r(ue.$$.fragment),Zs=a(),me=o("p"),me.innerHTML=$U,Ns=a(),r(rl.$$.fragment),Fs=a(),r(de.$$.fragment),Ss=a(),fe=o("p"),fe.textContent=hU,ks=a(),r(Ve.$$.fragment),Os=a(),Qe=o("p"),Qe.textContent=RU,Ds=a(),r(Ae.$$.fragment),Ws=a(),r($e.$$.fragment),qs=a(),he=o("p"),he.innerHTML=BU,vs=a(),r(Re.$$.fragment),Gs=a(),Be=o("p"),Be.textContent=_U,Xs=a(),r(_e.$$.fragment),zs=a(),ge=o("p"),ge.innerHTML=gU,Hs=a(),r(be.$$.fragment),xs=a(),Ee=o("p"),Ee.innerHTML=bU,Ys=a(),r(Ze.$$.fragment),Ls=a(),r(Ne.$$.fragment),Ps=a(),Fe=o("p"),Fe.innerHTML=EU,Ks=a(),r(Il.$$.fragment),lM=a(),r(Se.$$.fragment),eM=a(),ke=o("p"),ke.innerHTML=ZU,tM=a(),Oe=o("p"),Oe.innerHTML=NU,sM=a(),De=o("p"),De.innerHTML=FU,MM=a(),r(We.$$.fragment),UM=a(),r(qe.$$.fragment),nM=a(),ve=o("p"),ve.innerHTML=SU,JM=a(),r(cl.$$.fragment),aM=a(),Ge=o("p"),Ge.innerHTML=kU,TM=a(),r(ul.$$.fragment),pM=a(),r(Xe.$$.fragment),yM=a(),ze=o("p"),ze.innerHTML=OU,iM=a(),He=o("p"),He.textContent=DU,oM=a(),xe=o("ul"),xe.innerHTML=WU,jM=a(),Ye=o("p"),Ye.innerHTML=qU,CM=a(),r(Le.$$.fragment),wM=a(),Pe=o("p"),Pe.textContent=vU,rM=a(),r(Ke.$$.fragment),IM=a(),lt=o("p"),lt.textContent=GU,cM=a(),r(et.$$.fragment),uM=a(),tt=o("p"),tt.innerHTML=XU,mM=a(),st=o("p"),st.innerHTML=zU,dM=a(),r(Mt.$$.fragment),fM=a(),Ut=o("p"),Ut.innerHTML=HU,VM=a(),nt=o("p"),nt.textContent=xU,QM=a(),Jt=o("ul"),Jt.innerHTML=YU,AM=a(),at=o("p"),at.innerHTML=LU,$M=a(),Tt=o("ul"),Tt.innerHTML=PU,hM=a(),r(pt.$$.fragment),RM=a(),yt=o("ul"),yt.innerHTML=KU,BM=a(),it=o("p"),it.textContent=ln,_M=a(),r(ot.$$.fragment),gM=a(),jt=o("p"),jt.innerHTML=en,bM=a(),Ct=o("p"),Ct.innerHTML=tn,EM=a(),r(wt.$$.fragment),ZM=a(),rt=o("p"),rt.textContent=sn,NM=a(),It=o("p"),It.textContent=Mn,FM=a(),r(ct.$$.fragment),SM=a(),ut=o("p"),ut.innerHTML=Un,kM=a(),r(mt.$$.fragment),OM=a(),dt=o("p"),dt.innerHTML=nn,DM=a(),r(ft.$$.fragment),WM=a(),Vt=o("p"),Vt.innerHTML=Jn,qM=a(),Qt=o("p"),Qt.textContent=an,vM=a(),At=o("ul"),At.innerHTML=Tn,GM=a(),r($t.$$.fragment),XM=a(),ht=o("p"),this.h()},l(l){const M=_n("svelte-u9bgzb",document.head);s=j(M,"META",{name:!0,content:!0}),M.forEach(e),i=T(l),U=j(l,"P",{}),Qn(U).forEach(e),p=T(l),I(n.$$.fragment,l),J=T(l),y=j(l,"P",{"data-svelte-h":!0}),C(y)!=="svelte-eaoepn"&&(y.innerHTML=Q),f=T(l),R=j(l,"UL",{"data-svelte-h":!0}),C(R)!=="svelte-naazos"&&(R.innerHTML=E),b=T(l),Z=j(l,"P",{"data-svelte-h":!0}),C(Z)!=="svelte-17q94pv"&&(Z.innerHTML=w),B=T(l),S=j(l,"P",{"data-svelte-h":!0}),C(S)!=="svelte-j7debo"&&(S.innerHTML=O),_=T(l),I(h.$$.fragment,l),D=T(l),G=j(l,"P",{"data-svelte-h":!0}),C(G)!=="svelte-nq7j86"&&(G.innerHTML=Y),L=T(l),I(H.$$.fragment,l),X=T(l),I(K.$$.fragment,l),ll=T(l),I(q.$$.fragment,l),el=T(l),v=j(l,"P",{"data-svelte-h":!0}),C(v)!=="svelte-3o8idg"&&(v.innerHTML=P),$=T(l),I(F.$$.fragment,l),sl=T(l),tl=j(l,"P",{"data-svelte-h":!0}),C(tl)!=="svelte-8jkjyv"&&(tl.textContent=Ml),nl=T(l),x=j(l,"P",{"data-svelte-h":!0}),C(x)!=="svelte-1qfjn0l"&&(x.textContent=ml),V=T(l),I(N.$$.fragment,l),dl=T(l),Ul=j(l,"P",{"data-svelte-h":!0}),C(Ul)!=="svelte-3ef2n9"&&(Ul.textContent=HM),Rt=T(l),fl=j(l,"TABLE",{"data-svelte-h":!0}),C(fl)!=="svelte-1ukjioh"&&(fl.innerHTML=xM),Bt=T(l),Vl=j(l,"P",{"data-svelte-h":!0}),C(Vl)!=="svelte-4ugzxq"&&(Vl.textContent=YM),_t=T(l),Ql=j(l,"P",{"data-svelte-h":!0}),C(Ql)!=="svelte-jb44pv"&&(Ql.textContent=LM),gt=T(l),Al=j(l,"OL",{"data-svelte-h":!0}),C(Al)!=="svelte-9ezr99"&&(Al.innerHTML=PM),bt=T(l),I($l.$$.fragment,l),Et=T(l),hl=j(l,"P",{"data-svelte-h":!0}),C(hl)!=="svelte-xjp9mk"&&(hl.innerHTML=KM),Zt=T(l),I(Tl.$$.fragment,l),Nt=T(l),Rl=j(l,"P",{"data-svelte-h":!0}),C(Rl)!=="svelte-1j9y7bq"&&(Rl.innerHTML=lU),Ft=T(l),I(pl.$$.fragment,l),St=T(l),I(Bl.$$.fragment,l),kt=T(l),_l=j(l,"P",{"data-svelte-h":!0}),C(_l)!=="svelte-g3sr9r"&&(_l.textContent=eU),Ot=T(l),gl=j(l,"OL",{"data-svelte-h":!0}),C(gl)!=="svelte-11w97px"&&(gl.innerHTML=tU),Dt=T(l),bl=j(l,"P",{"data-svelte-h":!0}),C(bl)!=="svelte-1yskfjg"&&(bl.innerHTML=sU),Wt=T(l),El=j(l,"OL",{"data-svelte-h":!0}),C(El)!=="svelte-1bp1jzc"&&(El.innerHTML=MU),qt=T(l),Zl=j(l,"P",{"data-svelte-h":!0}),C(Zl)!=="svelte-a7t0kb"&&(Zl.innerHTML=UU),vt=T(l),I(Nl.$$.fragment,l),Gt=T(l),Fl=j(l,"P",{"data-svelte-h":!0}),C(Fl)!=="svelte-1q4ds8e"&&(Fl.innerHTML=nU),Xt=T(l),I(yl.$$.fragment,l),zt=T(l),Sl=j(l,"P",{"data-svelte-h":!0}),C(Sl)!=="svelte-7cq9wt"&&(Sl.innerHTML=JU),Ht=T(l),I(il.$$.fragment,l),xt=T(l),I(kl.$$.fragment,l),Yt=T(l),Ol=j(l,"P",{"data-svelte-h":!0}),C(Ol)!=="svelte-1i6wucc"&&(Ol.innerHTML=aU),Lt=T(l),Dl=j(l,"P",{"data-svelte-h":!0}),C(Dl)!=="svelte-nde1sh"&&(Dl.innerHTML=TU),Pt=T(l),Wl=j(l,"P",{"data-svelte-h":!0}),C(Wl)!=="svelte-pi99rv"&&(Wl.innerHTML=pU),Kt=T(l),I(ql.$$.fragment,l),ls=T(l),I(vl.$$.fragment,l),es=T(l),Gl=j(l,"P",{"data-svelte-h":!0}),C(Gl)!=="svelte-v7y32z"&&(Gl.textContent=yU),ts=T(l),I(Xl.$$.fragment,l),ss=T(l),zl=j(l,"P",{"data-svelte-h":!0}),C(zl)!=="svelte-oxu3it"&&(zl.textContent=iU),Ms=T(l),Hl=j(l,"OL",{"data-svelte-h":!0}),C(Hl)!=="svelte-1nb1u65"&&(Hl.innerHTML=oU),Us=T(l),I(xl.$$.fragment,l),ns=T(l),Yl=j(l,"P",{"data-svelte-h":!0}),C(Yl)!=="svelte-1435kea"&&(Yl.innerHTML=jU),Js=T(l),I(ol.$$.fragment,l),as=T(l),I(jl.$$.fragment,l),Ts=T(l),I(Ll.$$.fragment,l),ps=T(l),Pl=j(l,"P",{"data-svelte-h":!0}),C(Pl)!=="svelte-j8ngkc"&&(Pl.textContent=CU),ys=T(l),I(Cl.$$.fragment,l),is=T(l),I(Kl.$$.fragment,l),os=T(l),le=j(l,"P",{"data-svelte-h":!0}),C(le)!=="svelte-b5qvau"&&(le.innerHTML=wU),js=T(l),I(ee.$$.fragment,l),Cs=T(l),I(te.$$.fragment,l),ws=T(l),se=j(l,"P",{"data-svelte-h":!0}),C(se)!=="svelte-l6vz6d"&&(se.innerHTML=rU),rs=T(l),I(Me.$$.fragment,l),Is=T(l),I(Ue.$$.fragment,l),cs=T(l),ne=j(l,"P",{"data-svelte-h":!0}),C(ne)!=="svelte-azqlxb"&&(ne.innerHTML=IU),us=T(l),I(Je.$$.fragment,l),ms=T(l),I(ae.$$.fragment,l),ds=T(l),Te=j(l,"P",{"data-svelte-h":!0}),C(Te)!=="svelte-dpabvn"&&(Te.textContent=cU),fs=T(l),pe=j(l,"P",{"data-svelte-h":!0}),C(pe)!=="svelte-1686adi"&&(pe.textContent=uU),Vs=T(l),ye=j(l,"P",{"data-svelte-h":!0}),C(ye)!=="svelte-1adyxw4"&&(ye.textContent=mU),Qs=T(l),ie=j(l,"P",{"data-svelte-h":!0}),C(ie)!=="svelte-1rowry7"&&(ie.innerHTML=dU),As=T(l),I(oe.$$.fragment,l),$s=T(l),I(je.$$.fragment,l),hs=T(l),Ce=j(l,"P",{"data-svelte-h":!0}),C(Ce)!=="svelte-144yyml"&&(Ce.innerHTML=fU),Rs=T(l),we=j(l,"P",{"data-svelte-h":!0}),C(we)!=="svelte-ircvm2"&&(we.innerHTML=VU),Bs=T(l),I(wl.$$.fragment,l),_s=T(l),I(re.$$.fragment,l),gs=T(l),Ie=j(l,"P",{"data-svelte-h":!0}),C(Ie)!=="svelte-1bhljkl"&&(Ie.innerHTML=QU),bs=T(l),ce=j(l,"P",{"data-svelte-h":!0}),C(ce)!=="svelte-19uu04y"&&(ce.innerHTML=AU),Es=T(l),I(ue.$$.fragment,l),Zs=T(l),me=j(l,"P",{"data-svelte-h":!0}),C(me)!=="svelte-trc0q4"&&(me.innerHTML=$U),Ns=T(l),I(rl.$$.fragment,l),Fs=T(l),I(de.$$.fragment,l),Ss=T(l),fe=j(l,"P",{"data-svelte-h":!0}),C(fe)!=="svelte-rlqn6u"&&(fe.textContent=hU),ks=T(l),I(Ve.$$.fragment,l),Os=T(l),Qe=j(l,"P",{"data-svelte-h":!0}),C(Qe)!=="svelte-16exqvu"&&(Qe.textContent=RU),Ds=T(l),I(Ae.$$.fragment,l),Ws=T(l),I($e.$$.fragment,l),qs=T(l),he=j(l,"P",{"data-svelte-h":!0}),C(he)!=="svelte-1xsk9mv"&&(he.innerHTML=BU),vs=T(l),I(Re.$$.fragment,l),Gs=T(l),Be=j(l,"P",{"data-svelte-h":!0}),C(Be)!=="svelte-14qdmtn"&&(Be.textContent=_U),Xs=T(l),I(_e.$$.fragment,l),zs=T(l),ge=j(l,"P",{"data-svelte-h":!0}),C(ge)!=="svelte-a5v6xj"&&(ge.innerHTML=gU),Hs=T(l),I(be.$$.fragment,l),xs=T(l),Ee=j(l,"P",{"data-svelte-h":!0}),C(Ee)!=="svelte-qykc5c"&&(Ee.innerHTML=bU),Ys=T(l),I(Ze.$$.fragment,l),Ls=T(l),I(Ne.$$.fragment,l),Ps=T(l),Fe=j(l,"P",{"data-svelte-h":!0}),C(Fe)!=="svelte-3x8880"&&(Fe.innerHTML=EU),Ks=T(l),I(Il.$$.fragment,l),lM=T(l),I(Se.$$.fragment,l),eM=T(l),ke=j(l,"P",{"data-svelte-h":!0}),C(ke)!=="svelte-1tv6ab4"&&(ke.innerHTML=ZU),tM=T(l),Oe=j(l,"P",{"data-svelte-h":!0}),C(Oe)!=="svelte-1jb73uu"&&(Oe.innerHTML=NU),sM=T(l),De=j(l,"P",{"data-svelte-h":!0}),C(De)!=="svelte-i4o3my"&&(De.innerHTML=FU),MM=T(l),I(We.$$.fragment,l),UM=T(l),I(qe.$$.fragment,l),nM=T(l),ve=j(l,"P",{"data-svelte-h":!0}),C(ve)!=="svelte-1y01xkj"&&(ve.innerHTML=SU),JM=T(l),I(cl.$$.fragment,l),aM=T(l),Ge=j(l,"P",{"data-svelte-h":!0}),C(Ge)!=="svelte-cd2s2l"&&(Ge.innerHTML=kU),TM=T(l),I(ul.$$.fragment,l),pM=T(l),I(Xe.$$.fragment,l),yM=T(l),ze=j(l,"P",{"data-svelte-h":!0}),C(ze)!=="svelte-10hxzc"&&(ze.innerHTML=OU),iM=T(l),He=j(l,"P",{"data-svelte-h":!0}),C(He)!=="svelte-4ddn1l"&&(He.textContent=DU),oM=T(l),xe=j(l,"UL",{"data-svelte-h":!0}),C(xe)!=="svelte-1vk101s"&&(xe.innerHTML=WU),jM=T(l),Ye=j(l,"P",{"data-svelte-h":!0}),C(Ye)!=="svelte-wbpuhy"&&(Ye.innerHTML=qU),CM=T(l),I(Le.$$.fragment,l),wM=T(l),Pe=j(l,"P",{"data-svelte-h":!0}),C(Pe)!=="svelte-1lqs975"&&(Pe.textContent=vU),rM=T(l),I(Ke.$$.fragment,l),IM=T(l),lt=j(l,"P",{"data-svelte-h":!0}),C(lt)!=="svelte-1kkj86z"&&(lt.textContent=GU),cM=T(l),I(et.$$.fragment,l),uM=T(l),tt=j(l,"P",{"data-svelte-h":!0}),C(tt)!=="svelte-1a4l76t"&&(tt.innerHTML=XU),mM=T(l),st=j(l,"P",{"data-svelte-h":!0}),C(st)!=="svelte-pc4k6q"&&(st.innerHTML=zU),dM=T(l),I(Mt.$$.fragment,l),fM=T(l),Ut=j(l,"P",{"data-svelte-h":!0}),C(Ut)!=="svelte-13tr4mq"&&(Ut.innerHTML=HU),VM=T(l),nt=j(l,"P",{"data-svelte-h":!0}),C(nt)!=="svelte-1k5hfn8"&&(nt.textContent=xU),QM=T(l),Jt=j(l,"UL",{"data-svelte-h":!0}),C(Jt)!=="svelte-ar78mb"&&(Jt.innerHTML=YU),AM=T(l),at=j(l,"P",{"data-svelte-h":!0}),C(at)!=="svelte-1mjmuca"&&(at.innerHTML=LU),$M=T(l),Tt=j(l,"UL",{"data-svelte-h":!0}),C(Tt)!=="svelte-k10cds"&&(Tt.innerHTML=PU),hM=T(l),I(pt.$$.fragment,l),RM=T(l),yt=j(l,"UL",{"data-svelte-h":!0}),C(yt)!=="svelte-1v612"&&(yt.innerHTML=KU),BM=T(l),it=j(l,"P",{"data-svelte-h":!0}),C(it)!=="svelte-mhpkv9"&&(it.textContent=ln),_M=T(l),I(ot.$$.fragment,l),gM=T(l),jt=j(l,"P",{"data-svelte-h":!0}),C(jt)!=="svelte-1dp6ea1"&&(jt.innerHTML=en),bM=T(l),Ct=j(l,"P",{"data-svelte-h":!0}),C(Ct)!=="svelte-1tcm046"&&(Ct.innerHTML=tn),EM=T(l),I(wt.$$.fragment,l),ZM=T(l),rt=j(l,"P",{"data-svelte-h":!0}),C(rt)!=="svelte-1yf4ua2"&&(rt.textContent=sn),NM=T(l),It=j(l,"P",{"data-svelte-h":!0}),C(It)!=="svelte-1njbvfu"&&(It.textContent=Mn),FM=T(l),I(ct.$$.fragment,l),SM=T(l),ut=j(l,"P",{"data-svelte-h":!0}),C(ut)!=="svelte-ywaimq"&&(ut.innerHTML=Un),kM=T(l),I(mt.$$.fragment,l),OM=T(l),dt=j(l,"P",{"data-svelte-h":!0}),C(dt)!=="svelte-nno03d"&&(dt.innerHTML=nn),DM=T(l),I(ft.$$.fragment,l),WM=T(l),Vt=j(l,"P",{"data-svelte-h":!0}),C(Vt)!=="svelte-zgt8pc"&&(Vt.innerHTML=Jn),qM=T(l),Qt=j(l,"P",{"data-svelte-h":!0}),C(Qt)!=="svelte-3iitxe"&&(Qt.textContent=an),vM=T(l),At=j(l,"UL",{"data-svelte-h":!0}),C(At)!=="svelte-o0yfva"&&(At.innerHTML=Tn),GM=T(l),I($t.$$.fragment,l),XM=T(l),ht=j(l,"P",{}),Qn(ht).forEach(e),this.h()},h(){An(s,"name","hf:doc:metadata"),An(s,"content",mJ)},m(l,M){gn(document.head,s),t(l,i,M),t(l,U,M),t(l,p,M),c(n,l,M),t(l,J,M),t(l,y,M),t(l,f,M),t(l,R,M),t(l,b,M),t(l,Z,M),t(l,B,M),t(l,S,M),t(l,_,M),c(h,l,M),t(l,D,M),t(l,G,M),t(l,L,M),c(H,l,M),t(l,X,M),c(K,l,M),t(l,ll,M),c(q,l,M),t(l,el,M),t(l,v,M),t(l,$,M),c(F,l,M),t(l,sl,M),t(l,tl,M),t(l,nl,M),t(l,x,M),t(l,V,M),c(N,l,M),t(l,dl,M),t(l,Ul,M),t(l,Rt,M),t(l,fl,M),t(l,Bt,M),t(l,Vl,M),t(l,_t,M),t(l,Ql,M),t(l,gt,M),t(l,Al,M),t(l,bt,M),c($l,l,M),t(l,Et,M),t(l,hl,M),t(l,Zt,M),c(Tl,l,M),t(l,Nt,M),t(l,Rl,M),t(l,Ft,M),c(pl,l,M),t(l,St,M),c(Bl,l,M),t(l,kt,M),t(l,_l,M),t(l,Ot,M),t(l,gl,M),t(l,Dt,M),t(l,bl,M),t(l,Wt,M),t(l,El,M),t(l,qt,M),t(l,Zl,M),t(l,vt,M),c(Nl,l,M),t(l,Gt,M),t(l,Fl,M),t(l,Xt,M),c(yl,l,M),t(l,zt,M),t(l,Sl,M),t(l,Ht,M),c(il,l,M),t(l,xt,M),c(kl,l,M),t(l,Yt,M),t(l,Ol,M),t(l,Lt,M),t(l,Dl,M),t(l,Pt,M),t(l,Wl,M),t(l,Kt,M),c(ql,l,M),t(l,ls,M),c(vl,l,M),t(l,es,M),t(l,Gl,M),t(l,ts,M),c(Xl,l,M),t(l,ss,M),t(l,zl,M),t(l,Ms,M),t(l,Hl,M),t(l,Us,M),c(xl,l,M),t(l,ns,M),t(l,Yl,M),t(l,Js,M),c(ol,l,M),t(l,as,M),c(jl,l,M),t(l,Ts,M),c(Ll,l,M),t(l,ps,M),t(l,Pl,M),t(l,ys,M),c(Cl,l,M),t(l,is,M),c(Kl,l,M),t(l,os,M),t(l,le,M),t(l,js,M),c(ee,l,M),t(l,Cs,M),c(te,l,M),t(l,ws,M),t(l,se,M),t(l,rs,M),c(Me,l,M),t(l,Is,M),c(Ue,l,M),t(l,cs,M),t(l,ne,M),t(l,us,M),c(Je,l,M),t(l,ms,M),c(ae,l,M),t(l,ds,M),t(l,Te,M),t(l,fs,M),t(l,pe,M),t(l,Vs,M),t(l,ye,M),t(l,Qs,M),t(l,ie,M),t(l,As,M),c(oe,l,M),t(l,$s,M),c(je,l,M),t(l,hs,M),t(l,Ce,M),t(l,Rs,M),t(l,we,M),t(l,Bs,M),c(wl,l,M),t(l,_s,M),c(re,l,M),t(l,gs,M),t(l,Ie,M),t(l,bs,M),t(l,ce,M),t(l,Es,M),c(ue,l,M),t(l,Zs,M),t(l,me,M),t(l,Ns,M),c(rl,l,M),t(l,Fs,M),c(de,l,M),t(l,Ss,M),t(l,fe,M),t(l,ks,M),c(Ve,l,M),t(l,Os,M),t(l,Qe,M),t(l,Ds,M),c(Ae,l,M),t(l,Ws,M),c($e,l,M),t(l,qs,M),t(l,he,M),t(l,vs,M),c(Re,l,M),t(l,Gs,M),t(l,Be,M),t(l,Xs,M),c(_e,l,M),t(l,zs,M),t(l,ge,M),t(l,Hs,M),c(be,l,M),t(l,xs,M),t(l,Ee,M),t(l,Ys,M),c(Ze,l,M),t(l,Ls,M),c(Ne,l,M),t(l,Ps,M),t(l,Fe,M),t(l,Ks,M),c(Il,l,M),t(l,lM,M),c(Se,l,M),t(l,eM,M),t(l,ke,M),t(l,tM,M),t(l,Oe,M),t(l,sM,M),t(l,De,M),t(l,MM,M),c(We,l,M),t(l,UM,M),c(qe,l,M),t(l,nM,M),t(l,ve,M),t(l,JM,M),c(cl,l,M),t(l,aM,M),t(l,Ge,M),t(l,TM,M),c(ul,l,M),t(l,pM,M),c(Xe,l,M),t(l,yM,M),t(l,ze,M),t(l,iM,M),t(l,He,M),t(l,oM,M),t(l,xe,M),t(l,jM,M),t(l,Ye,M),t(l,CM,M),c(Le,l,M),t(l,wM,M),t(l,Pe,M),t(l,rM,M),c(Ke,l,M),t(l,IM,M),t(l,lt,M),t(l,cM,M),c(et,l,M),t(l,uM,M),t(l,tt,M),t(l,mM,M),t(l,st,M),t(l,dM,M),c(Mt,l,M),t(l,fM,M),t(l,Ut,M),t(l,VM,M),t(l,nt,M),t(l,QM,M),t(l,Jt,M),t(l,AM,M),t(l,at,M),t(l,$M,M),t(l,Tt,M),t(l,hM,M),c(pt,l,M),t(l,RM,M),t(l,yt,M),t(l,BM,M),t(l,it,M),t(l,_M,M),c(ot,l,M),t(l,gM,M),t(l,jt,M),t(l,bM,M),t(l,Ct,M),t(l,EM,M),c(wt,l,M),t(l,ZM,M),t(l,rt,M),t(l,NM,M),t(l,It,M),t(l,FM,M),c(ct,l,M),t(l,SM,M),t(l,ut,M),t(l,kM,M),c(mt,l,M),t(l,OM,M),t(l,dt,M),t(l,DM,M),c(ft,l,M),t(l,WM,M),t(l,Vt,M),t(l,qM,M),t(l,Qt,M),t(l,vM,M),t(l,At,M),t(l,GM,M),c($t,l,M),t(l,XM,M),t(l,ht,M),zM=!0},p(l,[M]){const pn={};M&2&&(pn.$$scope={dirty:M,ctx:l}),H.$set(pn);const yn={};M&2&&(yn.$$scope={dirty:M,ctx:l}),K.$set(yn);const on={};M&2&&(on.$$scope={dirty:M,ctx:l}),Tl.$set(on);const jn={};M&2&&(jn.$$scope={dirty:M,ctx:l}),pl.$set(jn);const Cn={};M&2&&(Cn.$$scope={dirty:M,ctx:l}),yl.$set(Cn);const wn={};M&2&&(wn.$$scope={dirty:M,ctx:l}),il.$set(wn);const rn={};M&2&&(rn.$$scope={dirty:M,ctx:l}),ol.$set(rn);const In={};M&2&&(In.$$scope={dirty:M,ctx:l}),jl.$set(In);const cn={};M&2&&(cn.$$scope={dirty:M,ctx:l}),Cl.$set(cn);const un={};M&2&&(un.$$scope={dirty:M,ctx:l}),wl.$set(un);const mn={};M&2&&(mn.$$scope={dirty:M,ctx:l}),rl.$set(mn);const dn={};M&2&&(dn.$$scope={dirty:M,ctx:l}),Il.$set(dn);const fn={};M&2&&(fn.$$scope={dirty:M,ctx:l}),cl.$set(fn);const Vn={};M&2&&(Vn.$$scope={dirty:M,ctx:l}),ul.$set(Vn)},i(l){zM||(u(n.$$.fragment,l),u(h.$$.fragment,l),u(H.$$.fragment,l),u(K.$$.fragment,l),u(q.$$.fragment,l),u(F.$$.fragment,l),u(N.$$.fragment,l),u($l.$$.fragment,l),u(Tl.$$.fragment,l),u(pl.$$.fragment,l),u(Bl.$$.fragment,l),u(Nl.$$.fragment,l),u(yl.$$.fragment,l),u(il.$$.fragment,l),u(kl.$$.fragment,l),u(ql.$$.fragment,l),u(vl.$$.fragment,l),u(Xl.$$.fragment,l),u(xl.$$.fragment,l),u(ol.$$.fragment,l),u(jl.$$.fragment,l),u(Ll.$$.fragment,l),u(Cl.$$.fragment,l),u(Kl.$$.fragment,l),u(ee.$$.fragment,l),u(te.$$.fragment,l),u(Me.$$.fragment,l),u(Ue.$$.fragment,l),u(Je.$$.fragment,l),u(ae.$$.fragment,l),u(oe.$$.fragment,l),u(je.$$.fragment,l),u(wl.$$.fragment,l),u(re.$$.fragment,l),u(ue.$$.fragment,l),u(rl.$$.fragment,l),u(de.$$.fragment,l),u(Ve.$$.fragment,l),u(Ae.$$.fragment,l),u($e.$$.fragment,l),u(Re.$$.fragment,l),u(_e.$$.fragment,l),u(be.$$.fragment,l),u(Ze.$$.fragment,l),u(Ne.$$.fragment,l),u(Il.$$.fragment,l),u(Se.$$.fragment,l),u(We.$$.fragment,l),u(qe.$$.fragment,l),u(cl.$$.fragment,l),u(ul.$$.fragment,l),u(Xe.$$.fragment,l),u(Le.$$.fragment,l),u(Ke.$$.fragment,l),u(et.$$.fragment,l),u(Mt.$$.fragment,l),u(pt.$$.fragment,l),u(ot.$$.fragment,l),u(wt.$$.fragment,l),u(ct.$$.fragment,l),u(mt.$$.fragment,l),u(ft.$$.fragment,l),u($t.$$.fragment,l),zM=!0)},o(l){m(n.$$.fragment,l),m(h.$$.fragment,l),m(H.$$.fragment,l),m(K.$$.fragment,l),m(q.$$.fragment,l),m(F.$$.fragment,l),m(N.$$.fragment,l),m($l.$$.fragment,l),m(Tl.$$.fragment,l),m(pl.$$.fragment,l),m(Bl.$$.fragment,l),m(Nl.$$.fragment,l),m(yl.$$.fragment,l),m(il.$$.fragment,l),m(kl.$$.fragment,l),m(ql.$$.fragment,l),m(vl.$$.fragment,l),m(Xl.$$.fragment,l),m(xl.$$.fragment,l),m(ol.$$.fragment,l),m(jl.$$.fragment,l),m(Ll.$$.fragment,l),m(Cl.$$.fragment,l),m(Kl.$$.fragment,l),m(ee.$$.fragment,l),m(te.$$.fragment,l),m(Me.$$.fragment,l),m(Ue.$$.fragment,l),m(Je.$$.fragment,l),m(ae.$$.fragment,l),m(oe.$$.fragment,l),m(je.$$.fragment,l),m(wl.$$.fragment,l),m(re.$$.fragment,l),m(ue.$$.fragment,l),m(rl.$$.fragment,l),m(de.$$.fragment,l),m(Ve.$$.fragment,l),m(Ae.$$.fragment,l),m($e.$$.fragment,l),m(Re.$$.fragment,l),m(_e.$$.fragment,l),m(be.$$.fragment,l),m(Ze.$$.fragment,l),m(Ne.$$.fragment,l),m(Il.$$.fragment,l),m(Se.$$.fragment,l),m(We.$$.fragment,l),m(qe.$$.fragment,l),m(cl.$$.fragment,l),m(ul.$$.fragment,l),m(Xe.$$.fragment,l),m(Le.$$.fragment,l),m(Ke.$$.fragment,l),m(et.$$.fragment,l),m(Mt.$$.fragment,l),m(pt.$$.fragment,l),m(ot.$$.fragment,l),m(wt.$$.fragment,l),m(ct.$$.fragment,l),m(mt.$$.fragment,l),m(ft.$$.fragment,l),m($t.$$.fragment,l),zM=!1},d(l){l&&(e(i),e(U),e(p),e(J),e(y),e(f),e(R),e(b),e(Z),e(B),e(S),e(_),e(D),e(G),e(L),e(X),e(ll),e(el),e(v),e($),e(sl),e(tl),e(nl),e(x),e(V),e(dl),e(Ul),e(Rt),e(fl),e(Bt),e(Vl),e(_t),e(Ql),e(gt),e(Al),e(bt),e(Et),e(hl),e(Zt),e(Nt),e(Rl),e(Ft),e(St),e(kt),e(_l),e(Ot),e(gl),e(Dt),e(bl),e(Wt),e(El),e(qt),e(Zl),e(vt),e(Gt),e(Fl),e(Xt),e(zt),e(Sl),e(Ht),e(xt),e(Yt),e(Ol),e(Lt),e(Dl),e(Pt),e(Wl),e(Kt),e(ls),e(es),e(Gl),e(ts),e(ss),e(zl),e(Ms),e(Hl),e(Us),e(ns),e(Yl),e(Js),e(as),e(Ts),e(ps),e(Pl),e(ys),e(is),e(os),e(le),e(js),e(Cs),e(ws),e(se),e(rs),e(Is),e(cs),e(ne),e(us),e(ms),e(ds),e(Te),e(fs),e(pe),e(Vs),e(ye),e(Qs),e(ie),e(As),e($s),e(hs),e(Ce),e(Rs),e(we),e(Bs),e(_s),e(gs),e(Ie),e(bs),e(ce),e(Es),e(Zs),e(me),e(Ns),e(Fs),e(Ss),e(fe),e(ks),e(Os),e(Qe),e(Ds),e(Ws),e(qs),e(he),e(vs),e(Gs),e(Be),e(Xs),e(zs),e(ge),e(Hs),e(xs),e(Ee),e(Ys),e(Ls),e(Ps),e(Fe),e(Ks),e(lM),e(eM),e(ke),e(tM),e(Oe),e(sM),e(De),e(MM),e(UM),e(nM),e(ve),e(JM),e(aM),e(Ge),e(TM),e(pM),e(yM),e(ze),e(iM),e(He),e(oM),e(xe),e(jM),e(Ye),e(CM),e(wM),e(Pe),e(rM),e(IM),e(lt),e(cM),e(uM),e(tt),e(mM),e(st),e(dM),e(fM),e(Ut),e(VM),e(nt),e(QM),e(Jt),e(AM),e(at),e($M),e(Tt),e(hM),e(RM),e(yt),e(BM),e(it),e(_M),e(gM),e(jt),e(bM),e(Ct),e(EM),e(ZM),e(rt),e(NM),e(It),e(FM),e(SM),e(ut),e(kM),e(OM),e(dt),e(DM),e(WM),e(Vt),e(qM),e(Qt),e(vM),e(At),e(GM),e(XM),e(ht)),e(s),d(n,l),d(h,l),d(H,l),d(K,l),d(q,l),d(F,l),d(N,l),d($l,l),d(Tl,l),d(pl,l),d(Bl,l),d(Nl,l),d(yl,l),d(il,l),d(kl,l),d(ql,l),d(vl,l),d(Xl,l),d(xl,l),d(ol,l),d(jl,l),d(Ll,l),d(Cl,l),d(Kl,l),d(ee,l),d(te,l),d(Me,l),d(Ue,l),d(Je,l),d(ae,l),d(oe,l),d(je,l),d(wl,l),d(re,l),d(ue,l),d(rl,l),d(de,l),d(Ve,l),d(Ae,l),d($e,l),d(Re,l),d(_e,l),d(be,l),d(Ze,l),d(Ne,l),d(Il,l),d(Se,l),d(We,l),d(qe,l),d(cl,l),d(ul,l),d(Xe,l),d(Le,l),d(Ke,l),d(et,l),d(Mt,l),d(pt,l),d(ot,l),d(wt,l),d(ct,l),d(mt,l),d(ft,l),d($t,l)}}}const mJ='{"title":"DeepSpeed","local":"deepspeed","sections":[{"title":"설치","local":"installation","sections":[],"depth":2},{"title":"메모리 요구량","local":"memory-requirements","sections":[],"depth":2},{"title":"ZeRO 단계 설정하기","local":"select-a-zero-stage","sections":[],"depth":2},{"title":"DeepSpeed 구성 파일","local":"deepspeed-configuration-file","sections":[{"title":"DeepSpeed와 Trainer 매개변수","local":"deepspeed-and-trainer-parameters","sections":[],"depth":3},{"title":"ZeRO 구성","local":"zero-configuration","sections":[],"depth":3},{"title":"NVMe 설정","local":"nvme-configuration","sections":[],"depth":3}],"depth":2},{"title":"DeepSpeed 구성","local":"deepspeed-features","sections":[{"title":"활성화/그레이디언트 체크포인팅","local":"activationgradient-checkpointing","sections":[],"depth":3},{"title":"옵티마이저와 스케줄러","local":"optimizer-and-scheduler","sections":[],"depth":3},{"title":"정밀도","local":"precision","sections":[],"depth":3},{"title":"배치 크기","local":"batch-size","sections":[],"depth":3},{"title":"그레이디언트 누적","local":"gradient-accumulation","sections":[],"depth":3},{"title":"그레이디언트 클리핑","local":"gradient-clipping","sections":[],"depth":3},{"title":"통신 데이터 유형(Communication data type)","local":"communication-data-type","sections":[],"depth":3}],"depth":2},{"title":"모델 배포","local":"deployment","sections":[{"title":"다중 노드 환경에서의 모델 배포","local":"multi-node-deployment","sections":[],"depth":3},{"title":"SLURM","local":"slurm","sections":[],"depth":3},{"title":"노트북","local":"notebook","sections":[],"depth":3}],"depth":2},{"title":"모델 가중치 저장하기","local":"save-model-weights","sections":[{"title":"온라인 환경","local":"online","sections":[],"depth":3},{"title":"오프라인 환경","local":"offline","sections":[],"depth":3}],"depth":2},{"title":"ZeRO Inference","local":"zero-inference","sections":[],"depth":2},{"title":"Trainer 없이 DeepSpeed 사용하기","local":"non-trainer-deepspeed-integration","sections":[{"title":"Trainer 없이 ZeRO Inference 사용하기","local":"non-trainer-zero-inference","sections":[],"depth":3},{"title":"생성","local":"generate","sections":[],"depth":3}],"depth":2},{"title":"트러블슈팅","local":"troubleshoot","sections":[{"title":"DeepSpeed 프로세스가 시작 단계에서 종료되었을 경우","local":"deepspeed-process-killed-at-startup","sections":[],"depth":3},{"title":"NaN 손실","local":"nan-loss","sections":[],"depth":3}],"depth":2},{"title":"리소스","local":"resources","sections":[],"depth":2}],"depth":1}';function dJ(A){return hn(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class RJ extends Rn{constructor(s){super(),Bn(this,s,dJ,uJ,$n,{})}}export{RJ as component}; | |
Xet Storage Details
- Size:
- 192 kB
- Xet hash:
- 237e2ab0ff4e0ec134cba1ff39c42157b1ca3df164a9fe0171198c46a0dbdfeb
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.