Buckets:

hf-doc-build
/

doc

Files

xet

hf-doc-build/doc / transformers /main /ko /perf_infer_gpu_multi.html

HuggingFaceDocBuilder

36 minutes ago

download

raw

57.8 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"분산 추론","local":"distributed-inference","sections":[{"title":"모델 분할","local":"partitioning-a-model","sections":[],"depth":2},{"title":"분할 전략","local":"partitioning-strategies","sections":[{"title":"패킹된 전략","local":"packed-strategies","sections":[],"depth":3},{"title":"로컬 전략","local":"local-strategies","sections":[],"depth":3}],"depth":2},{"title":"사용자 정의 분할 전략","local":"custom-partitioning-strategies","sections":[],"depth":2},{"title":"벤치마크","local":"benchmarks","sections":[],"depth":2},{"title":"설계 구현","local":"design-implementation","sections":[{"title":"DeviceMesh","local":"devicemesh","sections":[],"depth":3},{"title":"DTensor","local":"dtensor","sections":[],"depth":3}],"depth":2}],"depth":1}">
	<link href="/docs/transformers/main/ko/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/entry/start.3df1c19e.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/scheduler.53228c21.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/singletons.83674bae.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/index.e93d0901.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/paths.aee36068.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/entry/app.5226fb4b.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/preload-helper.cb103237.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/index.3db2ce32.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/nodes/0.46820ae2.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/nodes/132.62c5e052.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/CopyLLMTxtMenu.1327b590.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.49b88d99.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/CodeBlock.ada04ea6.js">
	<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/HfOption.be649c8b.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"분산 추론","local":"distributed-inference","sections":[{"title":"모델 분할","local":"partitioning-a-model","sections":[],"depth":2},{"title":"분할 전략","local":"partitioning-strategies","sections":[{"title":"패킹된 전략","local":"packed-strategies","sections":[],"depth":3},{"title":"로컬 전략","local":"local-strategies","sections":[],"depth":3}],"depth":2},{"title":"사용자 정의 분할 전략","local":"custom-partitioning-strategies","sections":[],"depth":2},{"title":"벤치마크","local":"benchmarks","sections":[],"depth":2},{"title":"설계 구현","local":"design-implementation","sections":[{"title":"DeviceMesh","local":"devicemesh","sections":[],"depth":3},{"title":"DTensor","local":"dtensor","sections":[],"depth":3}],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="distributed-inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#distributed-inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>분산 추론</span></h1> <p data-svelte-h="svelte-14d2zdi">모델이 단일 GPU에 올라가지 않는 경우, <a href="./perf_train_gpu_many#tensor-parallelism">텐서 병렬 처리</a>를 사용한 분산 추론이 도움이 될 수 있습니다. 텐서 병렬화는 모델을 여러 가속기(CUDA GPU, Intel XPU 등)에 분할하여 행렬 곱셈과 같은 계산을 병렬화합니다. 이를 통해 더 큰 모델을 메모리에 올릴 수 있으며, 각 가속기가 텐서의 일부를 처리하므로 추론 속도가 향상됩니다.</p> <p data-svelte-h="svelte-15tsczi">그러나 텐서 병렬화는 통신 오버헤드를 발생시키므로, 빠른 노드 내 통신을 활용할 수 있는 다중 가속기 환경에서 사용하는 것이 가장 효과적입니다. 다중 노드 학습 환경에서는 사용 사례에 따라 파이프라인 병렬화나 데이터 병렬화를 사용하는 것이 더 효율적일 수 있습니다.</p> <blockquote class="tip" data-svelte-h="svelte-19v7i6i"><p>텐서 병렬화에 대해 더 자세히 알아보려면 <a href="https://huggingface.co/spaces/nanotron/ultrascale-playbook?section=tensor_parallelism" rel="nofollow">Ultra-Scale Playbook</a>의 텐서 병렬화 섹션을 참조하세요.</p></blockquote> <p data-svelte-h="svelte-up20qz">아래 목록에서 텐서 병렬 처리를 기본적으로 지원하는 모델을 확인할 수 있습니다. 새로운 모델에 대한 지원을 추가하려면 GitHub 이슈나 풀 리퀘스트를 열어주세요.</p> <details data-svelte-h="svelte-1u7eyth"><summary>지원되는 모델 보기</summary> <ul><li><a href="./model_doc/cohere">Cohere</a> 및 <a href="./model_doc/cohere2">Cohere 2</a></li> <li><a href="./model_doc/gemma">Gemma</a> 및 <a href="./model_doc/gemma2">Gemma 2</a></li> <li><a href="./model_doc/glm">GLM</a></li> <li><a href="./model_doc/granite">Granite</a></li> <li><a href="./model_doc/llama">Llama</a></li> <li><a href="./model_doc/mistral">Mistral</a></li> <li><a href="./model_doc/mixtral">Mixtral</a></li> <li><a href="./model_doc/olmo">OLMo</a> 및 <a href="./model_doc/olmo2">OLMo2</a></li> <li><a href="./model_doc/phi">Phi</a> 및 <a href="./model_doc/phi3">Phi-3</a></li> <li><a href="./model_doc/qwen2">Qwen2</a>, <a href="./model_doc/qwen2_moe">Qwen2Moe</a>, 및 <a href="./model_doc/qwen2_5_vl">Qwen2-VL</a></li> <li><a href="./model_doc/starcoder2">Starcoder2</a></li></ul></details> <p data-svelte-h="svelte-z9x1gj">이 가이드는 Transformers에서 다양한 분할 전략을 사용하여 텐서 병렬화를 활성화하는 방법을 설명합니다.</p> <h2 class="relative group"><a id="partitioning-a-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#partitioning-a-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>모델 분할</span></h2> <p data-svelte-h="svelte-aoxhy1">Transformers는 <code>tp_plan</code>매개변수를 활용할 수 있는 모델에 대해 텐서 병렬 처리를 지원합니다. 모델 분할 방식은 두 가지가 있습니다.</p> <ul data-svelte-h="svelte-1utt5da"><li><code>auto</code> 텐서 병렬화 계획은 사전 정의된 구성을 기반으로 모델(위에 언급된 지원 모델)을 자동으로 분할합니다.</li> <li>사용자 지정 분할 계획을 직접 정의하여 [~PreTrainedModel.from_pretrained] 메소드의 <code>tp_plan</code> 매개변수로 전달할 수 있습니다.</li></ul> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">auto plan </div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">manual plan </div></div> <div class="language-select"><div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> os
	<span class="hljs-keyword">import</span> torch
	<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForCausalLM, AutoTokenizer

	<span class="hljs-comment"># model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct" # 모든 가능한 전략을 시각화하기에 더 좋음</span>
	model_id = <span class="hljs-string">"meta-llama/Meta-Llama-3-8B-Instruct"</span> <span class="hljs-comment"># 적은 수의 GPU에 더 좋음</span>

	model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, tp_plan=<span class="hljs-string">"auto"</span>)
	<span class="hljs-built_in">print</span>(model._tp_plan)

	tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"meta-llama/Meta-Llama-3-8B-Instruct"</span>)
	prompt = <span class="hljs-string">"Can I help"</span>
	inputs = tokenizer(prompt, return_tensors=<span class="hljs-string">"pt"</span>).input_ids.to(model.device)

	<span class="hljs-comment"># 분산 실행</span>
	outputs = model(inputs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1s689lc">위의 추론 스크립트를 GPU당 4개 프로세스로 <a href="https://pytorch.org/docs/stable/elastic/run.html" rel="nofollow">torchrun</a>에서 실행하세요.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-bash "><!-- HTML_TAG_START -->torchrun --nproc-per-node 4 demo.py<!-- HTML_TAG_END --></pre></div> </div> <h2 class="relative group"><a id="partitioning-strategies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#partitioning-strategies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>분할 전략</span></h2> <p data-svelte-h="svelte-j0a8zd">모든 분할 전략은 문자열을 전략 구현에 매핑하는 <code>ParallelInterface</code> 클래스에서 정의됩니다. 모든 전략은 <a href="/docs/transformers/main/ko/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a>의 <code>tp_plan</code>을 통해 설정되므로 이 클래스와 직접 상호 작용할 필요는 없지만, 어떤 전략을 사용할 수 있는지 확인할 때 유용합니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-keyword">class</span> <span class="hljs-title class_">ParallelInterface</span>(<span class="hljs-title class_ inherited__">MutableMapping</span>):
	<span class="hljs-string">"""
	허용된 어텐션 함수를 추적하는 딕셔너리 같은 객체입니다. `register()` 호출로 새로운 어텐션 함수를 쉽게 추가할 수 있습니다.
	모델이 기존 어텐션 함수(예: `sdpa`)를 로컬에서 덮어쓰려면 `modeling_<model>.py` 내부에서 이 클래스의 새 인스턴스를 선언하고
	해당 인스턴스에서 선언해야 합니다.
	"""</span>
	_global_mapping = {
	<span class="hljs-string">"colwise"</span>: ColwiseParallel(),
	<span class="hljs-string">"rowwise"</span>: RowwiseParallel(),
	<span class="hljs-string">"colwise_rep"</span>: ColwiseParallel(output_layouts=Replicate()),
	<span class="hljs-string">"rowwise_rep"</span>: RowwiseParallel(input_layouts=Replicate()),
	<span class="hljs-string">"local_colwise"</span>: ColwiseParallel(use_dtensor=<span class="hljs-literal">False</span>),
	<span class="hljs-string">"local_rowwise"</span>: RowwiseParallel(use_dtensor=<span class="hljs-literal">False</span>),
	<span class="hljs-string">"local"</span>: IsolatedParallel(),
	<span class="hljs-string">"moe_tp_experts"</span>: MoeTensorParalellExperts(),
	<span class="hljs-string">"local_packed_rowwise"</span>: PackedRowwiseParallel(use_dtensor=<span class="hljs-literal">False</span>),
	<span class="hljs-string">"sequence_parallel"</span>: SequenceParallel(),
	<span class="hljs-string">"replicate"</span>: ReplicateParallel(),
	}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4p6v3s">각 전략에 대해 자세히 알아보려면 아래 표를 참고하세요.</p> <table data-svelte-h="svelte-1ykbwg3"><thead><tr><th>전략</th> <th>설명</th></tr></thead> <tbody><tr><td><code>ColwiseParallel</code></td> <td>가중치와 편향의 열 방향 분할.</td></tr> <tr><td><code>RowwiseParallel</code></td> <td>가중치와 편향의 행 방향 분할. <code>nn.Embedding</code> 모듈 분할도 지원.</td></tr> <tr><td><code>SequenceParallel</code></td> <td><code>LayerNorm</code>과 <code>Dropout</code> 레이어를 지원하는 시퀀스 병렬 구현. <a href="https://github.com/facebookresearch/llama/blob/main/llama/model.py#L34" rel="nofollow">RMSNorm</a>의 Python 구현도 지원.</td></tr> <tr><td><code>PackedColwiseParallel</code></td> <td>패킹된 가중치를 지원하는 <code>ColwiseParallel</code>의 변형(예: <code>up_proj</code>와 <code>gate_proj</code>를 함께 패킹). 자세한 내용은 <a href="https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/tensor_parallel.py#L79-#L108" rel="nofollow">코드</a>를 참조하세요.</td></tr> <tr><td><code>PackedRowwiseParallel</code></td> <td>패킹된 가중치를 지원하는 <code>RowwiseParallel</code>의 변형(<a href="https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/tensor_parallel.py#L79-#L108" rel="nofollow">코드</a> 참조).</td></tr> <tr><td><code>GatherParallel</code></td> <td>기기 간 모듈의 출력을 수집.</td></tr> <tr><td><code>IsolatedParallel</code></td> <td>Mixture-of-Experts(MoE) 레이어의 전문가에 사용되어 다른 기기로부터 모듈을 격리.</td></tr> <tr><td><code>ReplicateParallel</code></td> <td>부분적으로 분할된 모델로 인해 <code>torch.distributed</code> API가 중단되는 것을 방지하기 위해 모든 기기에 모듈을 복제.</td></tr></tbody></table> <h3 class="relative group"><a id="packed-strategies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#packed-strategies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>패킹된 전략</span></h3> <p data-svelte-h="svelte-mlf0yd">가중치 패킹은 여러 선형 레이어를 하나의 더 큰 레이어로 합치는 기법입니다. 패킹된 전략인 <code>PackedColwiseParallel</code>과 <code>PackedRowwiseParallel</code>은 패킹된 가중치를 분할하는 데 사용됩니다. 기본적인 <code>ColwiseParallel</code>이나 <code>RowwiseParallel</code>은 패킹된 가중치를 올바르게 분할하지 못합니다.</p> <p data-svelte-h="svelte-1rsliip">아래 예시는 <code>up_proj</code>와 <code>gate_proj</code>를 단일 <code>gate_up_proj</code> 모듈로 패킹하고 <code>gate_up_proj</code>를 분할하기 위해 <code>PackedRowwiseParallel</code> 전략이 필요합니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">class</span> <span class="hljs-title class_">Llama4TextExperts</span>(nn.Module):
	...
	self.gate_up_proj = nn.Parameter(torch.zeros(self.num_experts, self.hidden_size, <span class="hljs-number">2</span> * self.expert_dim))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ac3lcr">배치 행렬 곱셈을 <code>forward</code> 패스에서 사용하여 <code>gate_up_proj</code> 모듈의 출력을 계산할 수 있습니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">forward</span>(<span class="hljs-params">self, hidden_states</span>):
	...
	gate_up = torch.bmm(hidden_states, self.gate_up_proj) <span class="hljs-comment"># gate_up_proj 모듈의 출력 계산</span>
	gate, up = gate_up.chunk(<span class="hljs-number">2</span>, dim=-<span class="hljs-number">1</span>) <span class="hljs-comment"># 출력을 gate와 up으로 분할</span><!-- HTML_TAG_END --></pre></div> <blockquote class="tip" data-svelte-h="svelte-178pzzr"><p><code>Packed*</code>를 사용해야 하는 이유에 대한 시각적 표현은 <a href="https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/tensor_parallel.py#L79-#L108" rel="nofollow">이 주석</a>을 참고하세요.</p></blockquote> <h3 class="relative group"><a id="local-strategies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#local-strategies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>로컬 전략</span></h3> <p data-svelte-h="svelte-jve0cp">로컬 전략(<code>local_colwise</code>, <code>local_rowwise</code>, <code>local_packed_rowwise</code>)은 <a href="https://docs.pytorch.org/docs/stable/generated/torch.chunk.html" rel="nofollow">torch.chunk</a>와 같은 일부 연산에서 지원되지 않기 때문에 <a href="https://docs.pytorch.org/docs/stable/distributed.tensor.html" rel="nofollow">DTensor</a>를 사용하지 않습니다. 대신 로컬 전략은 기본 <a href="https://docs.pytorch.org/docs/stable/tensors.html" rel="nofollow">torch.Tensor</a>를 사용하고 일부 분산 로직을 수동으로 수행합니다.</p> <h2 class="relative group"><a id="custom-partitioning-strategies" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#custom-partitioning-strategies"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>사용자 정의 분할 전략</span></h2> <p data-svelte-h="svelte-1dfxcdp">사용자 정의 분할 전략은 <a href="https://github.com/huggingface/transformers/blob/main/src/transformers/integrations/tensor_parallel.py" rel="nofollow"><code>TensorParallelLayer</code></a>를 상속하고 <code>partition_tensor</code>, <code>_prepare_input_fn</code>, <code>_prepare_output_fn</code>을 구현해야 합니다.</p> <p data-svelte-h="svelte-5mfgws">그런 다음 <code>tp_plan</code>에서 해당 전략을 지정했을 때 디스패칭 로직이 찾을 수 있도록 <code>ParallelInterface</code> 매핑에 등록해야 합니다.</p> <p data-svelte-h="svelte-1leetm1">아래 예시는 이 워크플로우로 <code>ColwiseParallel</code>을 구현하는 방법을 보여줍니다.</p> <ol><li><p data-svelte-h="svelte-8l7aga"><code>TensorParallelLayer</code>를 상속합니다. <code>__init__</code> 메소드에서 입력 및 출력 텐서가 기기에 어떻게 배치되어야 하는지 설명하는 <code>input_layouts</code>과 <code>output_layouts</code>을 정의합니다. <code>desired_input_layouts</code> 속성은 입력이 기기에 어떻게 배치<em>되어야만</em> 하는지를 명시하는 데 사용됩니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">class</span> <span class="hljs-title class_">ColwiseParallel</span>(<span class="hljs-title class_ inherited__">TensorParallelLayer</span>):
	<span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">
	self,
	*,
	input_layouts: <span class="hljs-type">Optional</span>[Placement] = <span class="hljs-literal">None</span>, <span class="hljs-comment"># 이전 레이어에서 오는 입력 레이아웃</span>
	output_layouts: <span class="hljs-type">Optional</span>[Placement] = <span class="hljs-literal">None</span>, <span class="hljs-comment"># 달성하고자 하는 출력 레이아웃</span>
	use_local_output: <span class="hljs-built_in">bool</span> = <span class="hljs-literal">True</span>, <span class="hljs-comment"># 로컬 출력 사용 여부</span>
	use_dtensor=<span class="hljs-literal">True</span>, <span class="hljs-comment"># DTensor 사용 여부</span>
	</span>):
	self.input_layouts = (input_layouts <span class="hljs-keyword">or</span> Replicate(),) <span class="hljs-comment"># 이전 레이어에서 오는 입력 분할</span>
	self.output_layouts = (output_layouts <span class="hljs-keyword">or</span> Shard(-<span class="hljs-number">1</span>),) <span class="hljs-comment"># 원하는 출력 분할</span>
	self.desired_input_layouts = (Replicate(),) <span class="hljs-comment"># 원하는 입력 분할, 입력은 GPU 간에 복제되어야 함</span>
	self.use_local_output = use_local_output
	self.use_dtensor = use_dtensor<!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-1resqlr"><code>partition_tensor</code>, <code>_prepare_input_fn</code>, <code>_prepare_output_fn</code> 메서드를 구현합니다.</p> <p data-svelte-h="svelte-rrnzhi"><code>partition_tensor</code> 메소드는 텐서를 분할하고 분할된 텐서로 <code>empty_param</code>을 채웁니다. 유틸리티 함수 <code>get_tensor_shard</code>를 사용하여 주어진 랭크에 대한 원본 매개변수의 올바른 분할을 얻고, 패킹된 가중치에 대해서는 <code>get_packed_weights</code>를 사용하세요.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">partition_tensor</span>(<span class="hljs-params">
	self,
	param, <span class="hljs-comment"># 매개변수의 전체 텐서</span>
	empty_param, <span class="hljs-comment"># 매개변수의 빈 텐서, 분할된 텐서로 채워짐</span>
	param_type, <span class="hljs-comment"># 매개변수 유형, `bias` 또는 `weight`</span>
	param_casting_dtype, <span class="hljs-comment"># 매개변수를 캐스팅할 유형</span>
	to_contiguous, <span class="hljs-comment"># 텐서를 연속적인 메모리 레이아웃으로 변환할지 여부</span>
	rank, <span class="hljs-comment"># 현재 기기의 랭크</span>
	device_mesh, <span class="hljs-comment"># 기기 메시</span>
	</span>) -> nn.Parameter: <span class="hljs-comment"># 분할된 매개변수 반환</span>
	...<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-h44hq"><code>_prepare_input_fn</code>과 <code>_prepare_output_fn</code> 메소드는 <a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.modules.module.register_module_forward_pre_hook.html" rel="nofollow">사전 포워드</a> 및 <a href="https://docs.pytorch.org/docs/stable/generated/torch.nn.modules.module.register_module_forward_hook.html" rel="nofollow">포워드</a> 훅에서 사용됩니다. <code>__init__</code>에서 지정된 대로 입력과 출력을 원하는 레이아웃으로 재분배합니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_input_fn</span>(<span class="hljs-params">input_layouts, desired_input_layouts, mod, inputs, device_mesh</span>):
	...
	<span class="hljs-comment"># 사용자 정의 로직 수행, DTensor로 캐스팅 등.</span>
	...
	<span class="hljs-keyword">return</span> inputs.redistribute(placements=desired_input_layouts, device_mesh=device_mesh)
	<span class="hljs-keyword">def</span> <span class="hljs-title function_">_prepare_output_fn</span>(<span class="hljs-params">output_layouts, use_local_output, mod, outputs, device_mesh</span>):
	...
	<span class="hljs-comment"># 사용자 정의 로직 수행, DTensor로 캐스팅 등.</span>
	...
	<span class="hljs-keyword">return</span> outputs.redistribute(placements=output_layouts, device_mesh=device_mesh)<!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-7l2lnu"><code>tp_plan</code>과 함께 사용할 수 있도록 전략을 <code>ParallelInterface</code>에 등록합니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers.integrations.tensor_parallel <span class="hljs-keyword">import</span> ParallelInterface

	ParallelInterface.register_strategy(<span class="hljs-string">"colwise_custom"</span>, ColwiseParallel)
	tp_plan = {
	<span class="hljs-string">"model.layers.*.self_attn.q_proj"</span>: <span class="hljs-string">"colwise_custom"</span>,
	...
	}
	model = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16, tp_plan=tp_plan)<!-- HTML_TAG_END --></pre></div></li></ol> <h2 class="relative group"><a id="benchmarks" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#benchmarks"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>벤치마크</span></h2> <p data-svelte-h="svelte-14zza52">텐서 병렬화는 특히 큰 배치 크기나 긴 시퀀스를 가진 입력에 대한 추론 속도를 크게 향상시킬 수 있습니다.</p> <p data-svelte-h="svelte-1yihgi7">시퀀스 길이가 512인 <a href="./model_doc/llama">Llama</a>에서 단일 포워드 패스에 대한 예상 속도 향상 수치는 아래 차트를 참조하세요.</p> <div style="text-align: center" data-svelte-h="svelte-1tp7cj2"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/Meta-Llama-3-8B-Instruct%2C%20seqlen%20%3D%20512%2C%20python%2C%20w_%20compile.png"></div> <h2 class="relative group"><a id="design-implementation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#design-implementation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>설계 구현</span></h2> <p data-svelte-h="svelte-75uovw">Transformers 텐서 병렬화 구현은 프레임워크에 구애받지 않지만, 구체적인 구현을 위해서는 <a href="https://docs.pytorch.org/tutorials/recipes/distributed_device_mesh.html" rel="nofollow">DeviceMesh</a>와 <a href="https://docs.pytorch.org/tutorials/beginner/dist_overview.html" rel="nofollow">torch.distributed</a>의 <a href="https://docs.pytorch.org/docs/stable/distributed.tensor.html" rel="nofollow">DTensor</a>에 의존하여 간단하고 확장 가능한 인터페이스를 제공합니다.</p> <h3 class="relative group"><a id="devicemesh" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#devicemesh"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DeviceMesh</span></h3> <p data-svelte-h="svelte-1y2fgo0"><code>DeviceMesh</code>를 함께 통신하는 기기들의 다차원 그리드로 상상해보세요. 병렬 처리 전략마다 각기 다른 통신 패턴이 필요하므로, 여러 하위 메시를 가진 <code>DeviceMesh</code>를 만들 수 있습니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.distributed.device_mesh <span class="hljs-keyword">import</span> init_device_mesh

	<span class="hljs-comment"># 4개 GPU의 1D 메시 생성</span>
	device_mesh = init_device_mesh(<span class="hljs-string">"cuda"</span>, (<span class="hljs-number">4</span>,), mesh_dim_names=[<span class="hljs-string">"tp"</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ow6amu"><code>torch.distributed</code>에서 정의된 대부분의 병렬화 전략은 메시 자체나 하위 메시에 적용할 수 있으며, 자동으로 통신 패턴을 처리합니다.</p> <h3 class="relative group"><a id="dtensor" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dtensor"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>DTensor</span></h3> <p data-svelte-h="svelte-1xnwx0n"><code>DTensor</code>(분산 텐서)는 일반적인 텐서 연산 위에 분산 로직을 처리하는 텐서 하위 클래스입니다. 텐서 병렬화의 대부분의 모델 가중치는 <code>DTensor</code> 형태로 저장됩니다.</p> <p data-svelte-h="svelte-1vtbhla">DTensor의 가장 중요한 부분은 <code>placement</code> 속성입니다. 이는 PyTorch에게 텐서가 <code>DeviceMesh</code>의 기기에 어떻게 배치되는지 알려주기 때문입니다. <code>placement</code> 속성은 다음 값을 가질 수 있습니다.</p> <ul><li><p data-svelte-h="svelte-16ifi17"><code>Shard(dimension)</code> - <code>DTensor</code>가 구성된 <code>DeviceMesh</code>에서 주어진 차원에 걸쳐 어떻게 분할되는지 나타냅니다. 아래 예시는 열 방향 분할을 위해 다양한 차원에 걸쳐 가중치를 분할하는 방법을 보여줍니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->weight = ...
	weight = DTensor.from_local(weight, device_mesh[<span class="hljs-string">"tp"</span>], placements=[Shard(<span class="hljs-number">0</span>)]) <span class="hljs-comment"># 첫 번째(열 방향) 차원에 걸쳐 분할</span>
	bias = ...
	bias = DTensor.from_local(bias, device_mesh[<span class="hljs-string">"tp"</span>], placements=[Shard(-<span class="hljs-number">1</span>)]) <span class="hljs-comment"># 유일한 차원에 걸쳐 분할</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1pehibv">이 예시는 행 방향 분할을 위해 여러 차원에 걸쳐 가중치를 분할하는 방법을 보여줍니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START -->weight = ...
	weight = DTensor.from_local(weight, device_mesh[<span class="hljs-string">"tp"</span>], placements=[Shard(<span class="hljs-number">1</span>)]) <span class="hljs-comment"># 두 번째(행 방향) 차원에 걸쳐 분할</span>
	bias = ...
	bias = DTensor.from_local(bias, device_mesh[<span class="hljs-string">"tp"</span>], placements=[Replicate()]) <span class="hljs-comment"># 모든 GPU에 편향 복제</span><!-- HTML_TAG_END --></pre></div></li> <li><p data-svelte-h="svelte-6uwdve"><code>Replicate()</code> - <code>DTensor</code>가 <code>DeviceMesh</code>에 걸쳐 복제됨을 나타냅니다. 각 기기에 텐서의 전체 사본만 생성합니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START -->bias = ...
	bias = DTensor.from_local(bias, device_mesh[<span class="hljs-string">"tp"</span>], placements=[Replicate()]) <span class="hljs-comment"># 모든 GPU에 편향 복제</span><!-- HTML_TAG_END --></pre></div></li> <li data-svelte-h="svelte-jjqmg1"><p><code>Partial()</code> - 텐서가 감소 연산을 기다리고 있는 상태임을 나타냅니다 (일반적으로 Transformers에서의 사용 사례와는 직접적인 관련이 적습니다).</p></li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ko/perf_infer_gpu_multi.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1vzb9oe = {
	assets: "/docs/transformers/main/ko",
	base: "/docs/transformers/main/ko",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/transformers/main/ko/_app/immutable/entry/start.3df1c19e.js"),
	import("/docs/transformers/main/ko/_app/immutable/entry/app.5226fb4b.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 132],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 57.8 kB
Xet hash:: e43a4452370e9b5eb0c50f5aeb62f4aa1f473abe4cce3e3f6ce13013f2961654

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.