Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"토큰 분류","local":"token-classification","sections":[{"title":"WNUT 17 데이터 세트 가져오기","local":"load-wnut-17-dataset","sections":[],"depth":2},{"title":"전처리","local":"preprocess","sections":[],"depth":2},{"title":"평가","local":"evaluation","sections":[],"depth":2},{"title":"훈련","local":"train","sections":[],"depth":2},{"title":"추론","local":"inference","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers/main/ko/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/entry/start.de34ae0b.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/scheduler.53228c21.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/singletons.1807e408.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/index.e93d0901.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/paths.864e2388.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/entry/app.54a1afb8.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/preload-helper.cb103237.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/index.3db2ce32.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/nodes/0.45408cb9.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/nodes/175.f300b48a.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/Tip.d62b0755.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/CopyLLMTxtMenu.1327b590.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.49b88d99.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/Youtube.b27f3a4a.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/CodeBlock.ada04ea6.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/DocNotebookDropdown.1834da50.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"토큰 분류","local":"token-classification","sections":[{"title":"WNUT 17 데이터 세트 가져오기","local":"load-wnut-17-dataset","sections":[],"depth":2},{"title":"전처리","local":"preprocess","sections":[],"depth":2},{"title":"평가","local":"evaluation","sections":[],"depth":2},{"title":"훈련","local":"train","sections":[],"depth":2},{"title":"추론","local":"inference","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <div class="flex space-x-1 " style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="token-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#token-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>토큰 분류</span></h1> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/wVHdVlPScxA" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-h22iel">토큰 분류는 문장의 개별 토큰에 레이블을 할당합니다. 가장 일반적인 토큰 분류 작업 중 하나는 개체명 인식(Named Entity Recognition, NER)입니다. 개체명 인식은 문장에서 사람, 위치 또는 조직과 같은 각 개체의 레이블을 찾으려고 시도합니다.</p> <p data-svelte-h="svelte-14hiaa1">이 가이드에서 학습할 내용은:</p> <ol data-svelte-h="svelte-kskchb"><li><a href="https://huggingface.co/datasets/wnut_17" rel="nofollow">WNUT 17</a> 데이터 세트에서 <a href="https://huggingface.co/distilbert/distilbert-base-uncased" rel="nofollow">DistilBERT</a>를 파인 튜닝하여 새로운 개체를 탐지합니다.</li> <li>추론을 위해 파인 튜닝 모델을 사용합니다.</li></ol> <blockquote class="tip"><p data-svelte-h="svelte-1xdu3h">이 작업과 호환되는 모든 아키텍처와 체크포인트를 보려면 <a href="https://huggingface.co/tasks/token-classification" rel="nofollow">작업 페이지</a>를 확인하는 것이 좋습니다.</p></blockquote> <p data-svelte-h="svelte-1bc8bfk">시작하기 전에, 필요한 모든 라이브러리가 설치되어 있는지 확인하세요:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-bash "><!-- HTML_TAG_START -->pip install transformers datasets evaluate seqeval<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1hhyu5y">Hugging Face 계정에 로그인하여 모델을 업로드하고 커뮤니티에 공유하는 것을 권장합니다. 메시지가 표시되면, 토큰을 입력하여 로그인하세요:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login | |
| <span class="hljs-meta">>>> </span>notebook_login()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="load-wnut-17-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-wnut-17-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>WNUT 17 데이터 세트 가져오기</span></h2> <p data-svelte-h="svelte-1v008tf">먼저 🤗 Datasets 라이브러리에서 WNUT 17 데이터 세트를 가져옵니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>wnut = load_dataset(<span class="hljs-string">"wnut_17"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-7y00sm">다음 예제를 살펴보세요:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>wnut[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'id'</span>: <span class="hljs-string">'0'</span>, | |
| <span class="hljs-string">'ner_tags'</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">7</span>, <span class="hljs-number">8</span>, <span class="hljs-number">8</span>, <span class="hljs-number">0</span>, <span class="hljs-number">7</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| <span class="hljs-string">'tokens'</span>: [<span class="hljs-string">'@paulwalk'</span>, <span class="hljs-string">'It'</span>, <span class="hljs-string">"'s"</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'view'</span>, <span class="hljs-string">'from'</span>, <span class="hljs-string">'where'</span>, <span class="hljs-string">'I'</span>, <span class="hljs-string">"'m"</span>, <span class="hljs-string">'living'</span>, <span class="hljs-string">'for'</span>, <span class="hljs-string">'two'</span>, <span class="hljs-string">'weeks'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'Empire'</span>, <span class="hljs-string">'State'</span>, <span class="hljs-string">'Building'</span>, <span class="hljs-string">'='</span>, <span class="hljs-string">'ESB'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'Pretty'</span>, <span class="hljs-string">'bad'</span>, <span class="hljs-string">'storm'</span>, <span class="hljs-string">'here'</span>, <span class="hljs-string">'last'</span>, <span class="hljs-string">'evening'</span>, <span class="hljs-string">'.'</span>] | |
| }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19kj2kd"><code>ner_tags</code>의 각 숫자는 개체를 나타냅니다. 숫자를 레이블 이름으로 변환하여 개체가 무엇인지 확인합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>label_list = wnut[<span class="hljs-string">"train"</span>].features[<span class="hljs-string">f"ner_tags"</span>].feature.names | |
| <span class="hljs-meta">>>> </span>label_list | |
| [ | |
| <span class="hljs-string">"O"</span>, | |
| <span class="hljs-string">"B-corporation"</span>, | |
| <span class="hljs-string">"I-corporation"</span>, | |
| <span class="hljs-string">"B-creative-work"</span>, | |
| <span class="hljs-string">"I-creative-work"</span>, | |
| <span class="hljs-string">"B-group"</span>, | |
| <span class="hljs-string">"I-group"</span>, | |
| <span class="hljs-string">"B-location"</span>, | |
| <span class="hljs-string">"I-location"</span>, | |
| <span class="hljs-string">"B-person"</span>, | |
| <span class="hljs-string">"I-person"</span>, | |
| <span class="hljs-string">"B-product"</span>, | |
| <span class="hljs-string">"I-product"</span>, | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jetaks">각 <code>ner_tag</code>의 앞에 붙은 문자는 개체의 토큰 위치를 나타냅니다:</p> <ul data-svelte-h="svelte-wzon1c"><li><code>B-</code>는 개체의 시작을 나타냅니다.</li> <li><code>I-</code>는 토큰이 동일한 개체 내부에 포함되어 있음을 나타냅니다(예를 들어 <code>State</code> 토큰은 <code>Empire State Building</code>와 같은 개체의 일부입니다).</li> <li><code>0</code>는 토큰이 어떤 개체에도 해당하지 않음을 나타냅니다.</li></ul> <h2 class="relative group"><a id="preprocess" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocess"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>전처리</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/iY2AZYdZAr0" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-xprzxq">다음으로 <code>tokens</code> 필드를 전처리하기 위해 DistilBERT 토크나이저를 가져옵니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"distilbert/distilbert-base-uncased"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-b47600">위의 예제 <code>tokens</code> 필드를 보면 입력이 이미 토큰화된 것처럼 보입니다. 그러나 실제로 입력은 아직 토큰화되지 않았으므로 단어를 하위 단어로 토큰화하기 위해 <code>is_split_into_words=True</code>를 설정해야 합니다. 예제로 확인합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example = wnut[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>tokenized_input = tokenizer(example[<span class="hljs-string">"tokens"</span>], is_split_into_words=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span>tokens = tokenizer.convert_ids_to_tokens(tokenized_input[<span class="hljs-string">"input_ids"</span>]) | |
| <span class="hljs-meta">>>> </span>tokens | |
| [<span class="hljs-string">'[CLS]'</span>, <span class="hljs-string">'@'</span>, <span class="hljs-string">'paul'</span>, <span class="hljs-string">'##walk'</span>, <span class="hljs-string">'it'</span>, <span class="hljs-string">"'"</span>, <span class="hljs-string">'s'</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'view'</span>, <span class="hljs-string">'from'</span>, <span class="hljs-string">'where'</span>, <span class="hljs-string">'i'</span>, <span class="hljs-string">"'"</span>, <span class="hljs-string">'m'</span>, <span class="hljs-string">'living'</span>, <span class="hljs-string">'for'</span>, <span class="hljs-string">'two'</span>, <span class="hljs-string">'weeks'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'empire'</span>, <span class="hljs-string">'state'</span>, <span class="hljs-string">'building'</span>, <span class="hljs-string">'='</span>, <span class="hljs-string">'es'</span>, <span class="hljs-string">'##b'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'pretty'</span>, <span class="hljs-string">'bad'</span>, <span class="hljs-string">'storm'</span>, <span class="hljs-string">'here'</span>, <span class="hljs-string">'last'</span>, <span class="hljs-string">'evening'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'[SEP]'</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1slve6o">그러나 이로 인해 <code>[CLS]</code>과 <code>[SEP]</code>라는 특수 토큰이 추가되고, 하위 단어 토큰화로 인해 입력과 레이블 간에 불일치가 발생합니다. 하나의 레이블에 해당하는 단일 단어는 이제 두 개의 하위 단어로 분할될 수 있습니다. 토큰과 레이블을 다음과 같이 재정렬해야 합니다:</p> <ol data-svelte-h="svelte-1ynfql3"><li><a href="https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.BatchEncoding.word_ids" rel="nofollow"><code>word_ids</code></a> 메소드로 모든 토큰을 해당 단어에 매핑합니다.</li> <li>특수 토큰 <code>[CLS]</code>와 <code>[SEP]</code>에 <code>-100</code> 레이블을 할당하여, PyTorch 손실 함수가 해당 토큰을 무시하도록 합니다.</li> <li>주어진 단어의 첫 번째 토큰에만 레이블을 지정합니다. 같은 단어의 다른 하위 토큰에 <code>-100</code>을 할당합니다.</li></ol> <p data-svelte-h="svelte-1uatru5">다음은 토큰과 레이블을 재정렬하고 DistilBERT의 최대 입력 길이보다 길지 않도록 시퀀스를 잘라내는 함수를 만드는 방법입니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_and_align_labels</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-meta">... </span> tokenized_inputs = tokenizer(examples[<span class="hljs-string">"tokens"</span>], truncation=<span class="hljs-literal">True</span>, is_split_into_words=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">... </span> labels = [] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> i, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(examples[<span class="hljs-string">f"ner_tags"</span>]): | |
| <span class="hljs-meta">... </span> word_ids = tokenized_inputs.word_ids(batch_index=i) <span class="hljs-comment"># Map tokens to their respective word.</span> | |
| <span class="hljs-meta">... </span> previous_word_idx = <span class="hljs-literal">None</span> | |
| <span class="hljs-meta">... </span> label_ids = [] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> word_idx <span class="hljs-keyword">in</span> word_ids: <span class="hljs-comment"># Set the special tokens to -100.</span> | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> word_idx <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>: | |
| <span class="hljs-meta">... </span> label_ids.append(-<span class="hljs-number">100</span>) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">elif</span> word_idx != previous_word_idx: <span class="hljs-comment"># Only label the first token of a given word.</span> | |
| <span class="hljs-meta">... </span> label_ids.append(label[word_idx]) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>: | |
| <span class="hljs-meta">... </span> label_ids.append(-<span class="hljs-number">100</span>) | |
| <span class="hljs-meta">... </span> previous_word_idx = word_idx | |
| <span class="hljs-meta">... </span> labels.append(label_ids) | |
| <span class="hljs-meta">... </span> tokenized_inputs[<span class="hljs-string">"labels"</span>] = labels | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> tokenized_inputs<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-o2pm0z">전체 데이터 세트에 전처리 함수를 적용하려면, 🤗 Datasets <code>map</code> 함수를 사용하세요. <code>batched=True</code>로 설정하여 데이터 세트의 여러 요소를 한 번에 처리하면 <code>map</code> 함수의 속도를 높일 수 있습니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>tokenized_wnut = wnut.<span class="hljs-built_in">map</span>(tokenize_and_align_labels, batched=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-18q27eh">이제 <a href="/docs/transformers/main/ko/main_classes/data_collator#transformers.DataCollatorWithPadding">DataCollatorWithPadding</a>를 사용하여 예제 배치를 만들어봅시다. 데이터 세트 전체를 최대 길이로 패딩하는 대신, <em>동적 패딩</em>을 사용하여 배치에서 가장 긴 길이에 맞게 문장을 패딩하는 것이 효율적입니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorForTokenClassification | |
| <span class="hljs-meta">>>> </span>data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="evaluation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>평가</span></h2> <p data-svelte-h="svelte-1xei3jh">훈련 중 모델의 성능을 평가하기 위해 평가 지표를 포함하는 것이 유용합니다. 🤗 <a href="https://huggingface.co/docs/evaluate/index" rel="nofollow">Evaluate</a> 라이브러리를 사용하여 빠르게 평가 방법을 가져올 수 있습니다. 이 작업에서는 <a href="https://huggingface.co/spaces/evaluate-metric/seqeval" rel="nofollow">seqeval</a> 평가 지표를 가져옵니다. (평가 지표를 가져오고 계산하는 방법에 대해서는 🤗 Evaluate <a href="https://huggingface.co/docs/evaluate/a_quick_tour" rel="nofollow">빠른 둘러보기</a>를 참조하세요). Seqeval은 실제로 정밀도, 재현률, F1 및 정확도와 같은 여러 점수를 산출합니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> evaluate | |
| <span class="hljs-meta">>>> </span>seqeval = evaluate.load(<span class="hljs-string">"seqeval"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-no1q5">먼저 NER 레이블을 가져온 다음, <code>compute</code>에 실제 예측과 실제 레이블을 전달하여 점수를 계산하는 함수를 만듭니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span>labels = [label_list[i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> example[<span class="hljs-string">f"ner_tags"</span>]] | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">compute_metrics</span>(<span class="hljs-params">p</span>): | |
| <span class="hljs-meta">... </span> predictions, labels = p | |
| <span class="hljs-meta">... </span> predictions = np.argmax(predictions, axis=<span class="hljs-number">2</span>) | |
| <span class="hljs-meta">... </span> true_predictions = [ | |
| <span class="hljs-meta">... </span> [label_list[p] <span class="hljs-keyword">for</span> (p, l) <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(prediction, label) <span class="hljs-keyword">if</span> l != -<span class="hljs-number">100</span>] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> prediction, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(predictions, labels) | |
| <span class="hljs-meta">... </span> ] | |
| <span class="hljs-meta">... </span> true_labels = [ | |
| <span class="hljs-meta">... </span> [label_list[l] <span class="hljs-keyword">for</span> (p, l) <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(prediction, label) <span class="hljs-keyword">if</span> l != -<span class="hljs-number">100</span>] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> prediction, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(predictions, labels) | |
| <span class="hljs-meta">... </span> ] | |
| <span class="hljs-meta">... </span> results = seqeval.compute(predictions=true_predictions, references=true_labels) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> { | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"precision"</span>: results[<span class="hljs-string">"overall_precision"</span>], | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"recall"</span>: results[<span class="hljs-string">"overall_recall"</span>], | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"f1"</span>: results[<span class="hljs-string">"overall_f1"</span>], | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"accuracy"</span>: results[<span class="hljs-string">"overall_accuracy"</span>], | |
| <span class="hljs-meta">... </span> }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1tfmmd0">이제 <code>compute_metrics</code> 함수를 사용할 준비가 되었으며, 훈련을 설정하면 이 함수로 되돌아올 것입니다.</p> <h2 class="relative group"><a id="train" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#train"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>훈련</span></h2> <p data-svelte-h="svelte-84b3vk">모델을 훈련하기 전에, <code>id2label</code>와 <code>label2id</code>를 사용하여 예상되는 id와 레이블의 맵을 생성하세요:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>id2label = { | |
| <span class="hljs-meta">... </span> <span class="hljs-number">0</span>: <span class="hljs-string">"O"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">1</span>: <span class="hljs-string">"B-corporation"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">2</span>: <span class="hljs-string">"I-corporation"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">3</span>: <span class="hljs-string">"B-creative-work"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">4</span>: <span class="hljs-string">"I-creative-work"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">5</span>: <span class="hljs-string">"B-group"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">6</span>: <span class="hljs-string">"I-group"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">7</span>: <span class="hljs-string">"B-location"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">8</span>: <span class="hljs-string">"I-location"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">9</span>: <span class="hljs-string">"B-person"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">10</span>: <span class="hljs-string">"I-person"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">11</span>: <span class="hljs-string">"B-product"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">12</span>: <span class="hljs-string">"I-product"</span>, | |
| <span class="hljs-meta">... </span>} | |
| <span class="hljs-meta">>>> </span>label2id = { | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"O"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-corporation"</span>: <span class="hljs-number">1</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-corporation"</span>: <span class="hljs-number">2</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-creative-work"</span>: <span class="hljs-number">3</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-creative-work"</span>: <span class="hljs-number">4</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-group"</span>: <span class="hljs-number">5</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-group"</span>: <span class="hljs-number">6</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-location"</span>: <span class="hljs-number">7</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-location"</span>: <span class="hljs-number">8</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-person"</span>: <span class="hljs-number">9</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-person"</span>: <span class="hljs-number">10</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-product"</span>: <span class="hljs-number">11</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-product"</span>: <span class="hljs-number">12</span>, | |
| <span class="hljs-meta">... </span>}<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-kjzaqs"><a href="/docs/transformers/main/ko/main_classes/trainer#transformers.Trainer">Trainer</a>를 사용하여 모델을 파인 튜닝하는 방법에 익숙하지 않은 경우, <a href="../training#train-with-pytorch-trainer">여기</a>에서 기본 튜토리얼을 확인하세요!</p></blockquote> <p data-svelte-h="svelte-188l4pk">이제 모델을 훈련시킬 준비가 되었습니다! <a href="/docs/transformers/main/ko/model_doc/auto#transformers.AutoModelForSequenceClassification">AutoModelForSequenceClassification</a>로 DistilBERT를 가져오고 예상되는 레이블 수와 레이블 매핑을 지정하세요:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForTokenClassification, TrainingArguments, Trainer | |
| <span class="hljs-meta">>>> </span>model = AutoModelForTokenClassification.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"distilbert/distilbert-base-uncased"</span>, num_labels=<span class="hljs-number">13</span>, id2label=id2label, label2id=label2id | |
| <span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14zzcxs">이제 세 단계만 거치면 끝입니다:</p> <ol data-svelte-h="svelte-1gzp7eg"><li><a href="/docs/transformers/main/ko/main_classes/trainer#transformers.TrainingArguments">TrainingArguments</a>에서 하이퍼파라미터를 정의하세요. <code>output_dir</code>는 모델을 저장할 위치를 지정하는 유일한 매개변수입니다. 이 모델을 허브에 업로드하기 위해 <code>push_to_hub=True</code>를 설정합니다(모델을 업로드하기 위해 Hugging Face에 로그인해야합니다.) 각 에폭이 끝날 때마다, <a href="/docs/transformers/main/ko/main_classes/trainer#transformers.Trainer">Trainer</a>는 seqeval 점수를 평가하고 훈련 체크포인트를 저장합니다.</li> <li><a href="/docs/transformers/main/ko/main_classes/trainer#transformers.Trainer">Trainer</a>에 훈련 인수와 모델, 데이터 세트, 토크나이저, 데이터 콜레이터 및 <code>compute_metrics</code> 함수를 전달하세요.</li> <li><a href="/docs/transformers/main/ko/main_classes/trainer#transformers.Trainer.train">train()</a>를 호출하여 모델을 파인 튜닝하세요.</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>training_args = TrainingArguments( | |
| <span class="hljs-meta">... </span> output_dir=<span class="hljs-string">"my_awesome_wnut_model"</span>, | |
| <span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">2e-5</span>, | |
| <span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> per_device_eval_batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">2</span>, | |
| <span class="hljs-meta">... </span> weight_decay=<span class="hljs-number">0.01</span>, | |
| <span class="hljs-meta">... </span> eval_strategy=<span class="hljs-string">"epoch"</span>, | |
| <span class="hljs-meta">... </span> save_strategy=<span class="hljs-string">"epoch"</span>, | |
| <span class="hljs-meta">... </span> load_best_model_at_end=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span> push_to_hub=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>trainer = Trainer( | |
| <span class="hljs-meta">... </span> model=model, | |
| <span class="hljs-meta">... </span> args=training_args, | |
| <span class="hljs-meta">... </span> train_dataset=tokenized_wnut[<span class="hljs-string">"train"</span>], | |
| <span class="hljs-meta">... </span> eval_dataset=tokenized_wnut[<span class="hljs-string">"test"</span>], | |
| <span class="hljs-meta">... </span> processing_class=tokenizer, | |
| <span class="hljs-meta">... </span> data_collator=data_collator, | |
| <span class="hljs-meta">... </span> compute_metrics=compute_metrics, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-110p6vq">훈련이 완료되면, <a href="/docs/transformers/main/ko/main_classes/trainer#transformers.Trainer.push_to_hub">push_to_hub()</a> 메소드를 사용하여 모델을 허브에 공유할 수 있습니다.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>trainer.push_to_hub()<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-139pooy">토큰 분류를 위한 모델을 파인 튜닝하는 자세한 예제는 다음 | |
| <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb" rel="nofollow">PyTorch notebook</a> | |
| 또는 <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification-tf.ipynb" rel="nofollow">TensorFlow notebook</a>를 참조하세요.</p></blockquote> <h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>추론</span></h2> <p data-svelte-h="svelte-1r6hgyn">좋아요, 이제 모델을 파인 튜닝했으니 추론에 사용할 수 있습니다!</p> <p data-svelte-h="svelte-ej1eir">추론을 수행하고자 하는 텍스트를 가져와봅시다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>text = <span class="hljs-string">"The Golden State Warriors are an American professional basketball team based in San Francisco."</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-bavprk">파인 튜닝된 모델로 추론을 시도하는 가장 간단한 방법은 <code>pipeline()</code>를 사용하는 것입니다. 모델로 NER의 <code>pipeline</code>을 인스턴스화하고, 텍스트를 전달해보세요:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-meta">>>> </span>classifier = pipeline(<span class="hljs-string">"ner"</span>, model=<span class="hljs-string">"stevhliu/my_awesome_wnut_model"</span>) | |
| <span class="hljs-meta">>>> </span>classifier(text) | |
| [{<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.42658573</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">2</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'golden'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">4</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">10</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'I-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.35856336</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">3</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'state'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">11</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">16</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-group'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.3064001</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">4</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'warriors'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">17</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">25</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.65523505</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">13</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'san'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">80</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">83</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.4668663</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">14</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'francisco'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">84</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">93</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1smgeha">원한다면, <code>pipeline</code>의 결과를 수동으로 복제할 수도 있습니다:</p> <p data-svelte-h="svelte-ctuaol">텍스트를 토큰화하고 PyTorch 텐서를 반환합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"stevhliu/my_awesome_wnut_model"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(text, return_tensors=<span class="hljs-string">"pt"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1hjuppo">입력을 모델에 전달하고 <code>logits</code>을 반환합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForTokenClassification | |
| <span class="hljs-meta">>>> </span>model = AutoModelForTokenClassification.from_pretrained(<span class="hljs-string">"stevhliu/my_awesome_wnut_model"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> logits = model(**inputs).logits<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jbp04u">가장 높은 확률을 가진 클래스를 모델의 <code>id2label</code> 매핑을 사용하여 텍스트 레이블로 변환합니다:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>predictions = torch.argmax(logits, dim=<span class="hljs-number">2</span>) | |
| <span class="hljs-meta">>>> </span>predicted_token_class = [model.config.id2label[t.item()] <span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> predictions[<span class="hljs-number">0</span>]] | |
| <span class="hljs-meta">>>> </span>predicted_token_class | |
| [<span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'I-location'</span>, | |
| <span class="hljs-string">'B-group'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>]<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ko/tasks/token_classification.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_13maekw = { | |
| assets: "/docs/transformers/main/ko", | |
| base: "/docs/transformers/main/ko", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers/main/ko/_app/immutable/entry/start.de34ae0b.js"), | |
| import("/docs/transformers/main/ko/_app/immutable/entry/app.54a1afb8.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 175], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 74.9 kB
- Xet hash:
- 291a657362bc4f800a3d5cd86637a1325187308b6adc9fdbfb13a560051fed2e
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.