Buckets:

hf-doc-build/doc / transformers /main /ja /tasks /token_classification.html
HuggingFaceDocBuilder's picture
download
raw
76.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Token classification&quot;,&quot;local&quot;:&quot;token-classification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Load WNUT 17 dataset&quot;,&quot;local&quot;:&quot;load-wnut-17-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Preprocess&quot;,&quot;local&quot;:&quot;preprocess&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Evaluate&quot;,&quot;local&quot;:&quot;evaluate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Train&quot;,&quot;local&quot;:&quot;train&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference&quot;,&quot;local&quot;:&quot;inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/start.b596590e.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/scheduler.0c7da001.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/singletons.11a17a36.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/paths.351f5b4a.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/app.ede2e8a7.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/preload-helper.68965f5e.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.e0be4b2b.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/0.0325648c.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/144.b0268842.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Tip.f6e0bcd3.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/CopyLLMTxtMenu.6767f26f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.27cfd80f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Youtube.cf53141f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/CodeBlock.418ebe13.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/DocNotebookDropdown.367a2ff8.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Token classification&quot;,&quot;local&quot;:&quot;token-classification&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Load WNUT 17 dataset&quot;,&quot;local&quot;:&quot;load-wnut-17-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Preprocess&quot;,&quot;local&quot;:&quot;preprocess&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Evaluate&quot;,&quot;local&quot;:&quot;evaluate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Train&quot;,&quot;local&quot;:&quot;train&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference&quot;,&quot;local&quot;:&quot;inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <div class="flex space-x-1 " style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="token-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#token-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Token classification</span></h1> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/wVHdVlPScxA" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-1pzr9oy">トークン分類では、文内の個々のトークンにラベルを割り当てます。最も一般的なトークン分類タスクの 1 つは、固有表現認識 (NER) です。 NER は、人、場所、組織など、文内の各エンティティのラベルを見つけようとします。</p> <p data-svelte-h="svelte-w5jzhi">このガイドでは、次の方法を説明します。</p> <ol data-svelte-h="svelte-s4jtml"><li><a href="https://huggingface.co/datasets/wnut_17" rel="nofollow">WNUT 17</a> データセットで <a href="https://huggingface.co/distilbert/distilbert-base-uncased" rel="nofollow">DistilBERT</a> を微調整して、新しいエンティティを検出します。</li> <li>微調整されたモデルを推論に使用します。</li></ol> <blockquote class="tip"><p data-svelte-h="svelte-62918c">このタスクと互換性のあるすべてのアーキテクチャとチェックポイントを確認するには、<a href="https://huggingface.co/tasks/token-classification" rel="nofollow">タスクページ</a> を確認することをお勧めします。</p></blockquote> <p data-svelte-h="svelte-1lya3k8">始める前に、必要なライブラリがすべてインストールされていることを確認してください。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-bash "><!-- HTML_TAG_START -->pip install transformers datasets evaluate seqeval<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-193zy02">モデルをアップロードしてコミュニティと共有できるように、Hugging Face アカウントにログインすることをお勧めします。プロンプトが表示されたら、トークンを入力してログインします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
<span class="hljs-meta">&gt;&gt;&gt; </span>notebook_login()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="load-wnut-17-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-wnut-17-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load WNUT 17 dataset</span></h2> <p data-svelte-h="svelte-l7eyn2">まず、🤗 データセット ライブラリから WNUT 17 データセットをロードします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>wnut = load_dataset(<span class="hljs-string">&quot;wnut_17&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1r6oj5w">次に、例を見てみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>wnut[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">0</span>]
{<span class="hljs-string">&#x27;id&#x27;</span>: <span class="hljs-string">&#x27;0&#x27;</span>,
<span class="hljs-string">&#x27;ner_tags&#x27;</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">7</span>, <span class="hljs-number">8</span>, <span class="hljs-number">8</span>, <span class="hljs-number">0</span>, <span class="hljs-number">7</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
<span class="hljs-string">&#x27;tokens&#x27;</span>: [<span class="hljs-string">&#x27;@paulwalk&#x27;</span>, <span class="hljs-string">&#x27;It&#x27;</span>, <span class="hljs-string">&quot;&#x27;s&quot;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;view&#x27;</span>, <span class="hljs-string">&#x27;from&#x27;</span>, <span class="hljs-string">&#x27;where&#x27;</span>, <span class="hljs-string">&#x27;I&#x27;</span>, <span class="hljs-string">&quot;&#x27;m&quot;</span>, <span class="hljs-string">&#x27;living&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;two&#x27;</span>, <span class="hljs-string">&#x27;weeks&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;Empire&#x27;</span>, <span class="hljs-string">&#x27;State&#x27;</span>, <span class="hljs-string">&#x27;Building&#x27;</span>, <span class="hljs-string">&#x27;=&#x27;</span>, <span class="hljs-string">&#x27;ESB&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;Pretty&#x27;</span>, <span class="hljs-string">&#x27;bad&#x27;</span>, <span class="hljs-string">&#x27;storm&#x27;</span>, <span class="hljs-string">&#x27;here&#x27;</span>, <span class="hljs-string">&#x27;last&#x27;</span>, <span class="hljs-string">&#x27;evening&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>]
}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-oegc1b"><code>ner_tags</code>内の各数字はエンティティを表します。数値をラベル名に変換して、エンティティが何であるかを調べます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>label_list = wnut[<span class="hljs-string">&quot;train&quot;</span>].features[<span class="hljs-string">f&quot;ner_tags&quot;</span>].feature.names
<span class="hljs-meta">&gt;&gt;&gt; </span>label_list
[
<span class="hljs-string">&quot;O&quot;</span>,
<span class="hljs-string">&quot;B-corporation&quot;</span>,
<span class="hljs-string">&quot;I-corporation&quot;</span>,
<span class="hljs-string">&quot;B-creative-work&quot;</span>,
<span class="hljs-string">&quot;I-creative-work&quot;</span>,
<span class="hljs-string">&quot;B-group&quot;</span>,
<span class="hljs-string">&quot;I-group&quot;</span>,
<span class="hljs-string">&quot;B-location&quot;</span>,
<span class="hljs-string">&quot;I-location&quot;</span>,
<span class="hljs-string">&quot;B-person&quot;</span>,
<span class="hljs-string">&quot;I-person&quot;</span>,
<span class="hljs-string">&quot;B-product&quot;</span>,
<span class="hljs-string">&quot;I-product&quot;</span>,
]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ojxi8i"><code>ner_tag</code> の前に付く文字は、エンティティのトークンの位置を示します。</p> <ul data-svelte-h="svelte-84dn3u"><li><code>B-</code> はエンティティの始まりを示します。</li> <li><code>I-</code> は、トークンが同じエンティティ内に含まれていることを示します (たとえば、<code>State</code> トークンは次のようなエンティティの一部です)
<code>Empire State Building</code>)。</li> <li><code>0</code> は、トークンがどのエンティティにも対応しないことを示します。</li></ul> <h2 class="relative group"><a id="preprocess" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocess"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Preprocess</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/iY2AZYdZAr0" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-cnh8zx">次のステップでは、DistilBERT トークナイザーをロードして<code>tokens</code>フィールドを前処理します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;distilbert/distilbert-base-uncased&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1iw1brp">上の <code>tokens</code>フィールドの例で見たように、入力はすでにトークン化されているようです。しかし、実際には入力はまだトークン化されていないため、単語をサブワードにトークン化するには<code>is_split_into_words=True</code> を設定する必要があります。例えば:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>example = wnut[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenized_input = tokenizer(example[<span class="hljs-string">&quot;tokens&quot;</span>], is_split_into_words=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>tokens = tokenizer.convert_ids_to_tokens(tokenized_input[<span class="hljs-string">&quot;input_ids&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span>tokens
[<span class="hljs-string">&#x27;[CLS]&#x27;</span>, <span class="hljs-string">&#x27;@&#x27;</span>, <span class="hljs-string">&#x27;paul&#x27;</span>, <span class="hljs-string">&#x27;##walk&#x27;</span>, <span class="hljs-string">&#x27;it&#x27;</span>, <span class="hljs-string">&quot;&#x27;&quot;</span>, <span class="hljs-string">&#x27;s&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;view&#x27;</span>, <span class="hljs-string">&#x27;from&#x27;</span>, <span class="hljs-string">&#x27;where&#x27;</span>, <span class="hljs-string">&#x27;i&#x27;</span>, <span class="hljs-string">&quot;&#x27;&quot;</span>, <span class="hljs-string">&#x27;m&#x27;</span>, <span class="hljs-string">&#x27;living&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;two&#x27;</span>, <span class="hljs-string">&#x27;weeks&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;empire&#x27;</span>, <span class="hljs-string">&#x27;state&#x27;</span>, <span class="hljs-string">&#x27;building&#x27;</span>, <span class="hljs-string">&#x27;=&#x27;</span>, <span class="hljs-string">&#x27;es&#x27;</span>, <span class="hljs-string">&#x27;##b&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;pretty&#x27;</span>, <span class="hljs-string">&#x27;bad&#x27;</span>, <span class="hljs-string">&#x27;storm&#x27;</span>, <span class="hljs-string">&#x27;here&#x27;</span>, <span class="hljs-string">&#x27;last&#x27;</span>, <span class="hljs-string">&#x27;evening&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;[SEP]&#x27;</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5b435g">ただし、これによりいくつかの特別なトークン <code>[CLS]</code><code>[SEP]</code> が追加され、サブワードのトークン化により入力とラベルの間に不一致が生じます。 1 つのラベルに対応する 1 つの単語を 2 つのサブワードに分割できるようになりました。次の方法でトークンとラベルを再調整する必要があります。</p> <ol data-svelte-h="svelte-11v861c"><li><a href="https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.BatchEncoding.word_ids" rel="nofollow"><code>word_ids</code></a> メソッドを使用して、すべてのトークンを対応する単語にマッピングします。</li> <li>特別なトークン <code>[CLS]</code><code>[SEP]</code> にラベル <code>-100</code> を割り当て、それらが PyTorch 損失関数によって無視されるようにします (<a href="https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html" rel="nofollow">CrossEntropyLoss</a>)。</li> <li>特定の単語の最初のトークンのみにラベルを付けます。同じ単語の他のサブトークンに <code>-100</code>を割り当てます。</li></ol> <p data-svelte-h="svelte-173aflj">トークンとラベルを再調整し、シーケンスを DistilBERT の最大入力長以下に切り詰める関数を作成する方法を次に示します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_and_align_labels</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> tokenized_inputs = tokenizer(examples[<span class="hljs-string">&quot;tokens&quot;</span>], truncation=<span class="hljs-literal">True</span>, is_split_into_words=<span class="hljs-literal">True</span>)
<span class="hljs-meta">... </span> labels = []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> i, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(examples[<span class="hljs-string">f&quot;ner_tags&quot;</span>]):
<span class="hljs-meta">... </span> word_ids = tokenized_inputs.word_ids(batch_index=i) <span class="hljs-comment"># Map tokens to their respective word.</span>
<span class="hljs-meta">... </span> previous_word_idx = <span class="hljs-literal">None</span>
<span class="hljs-meta">... </span> label_ids = []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> word_idx <span class="hljs-keyword">in</span> word_ids: <span class="hljs-comment"># Set the special tokens to -100.</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> word_idx <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
<span class="hljs-meta">... </span> label_ids.append(-<span class="hljs-number">100</span>)
<span class="hljs-meta">... </span> <span class="hljs-keyword">elif</span> word_idx != previous_word_idx: <span class="hljs-comment"># Only label the first token of a given word.</span>
<span class="hljs-meta">... </span> label_ids.append(label[word_idx])
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> label_ids.append(-<span class="hljs-number">100</span>)
<span class="hljs-meta">... </span> previous_word_idx = word_idx
<span class="hljs-meta">... </span> labels.append(label_ids)
<span class="hljs-meta">... </span> tokenized_inputs[<span class="hljs-string">&quot;labels&quot;</span>] = labels
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> tokenized_inputs<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rpvj99">データセット全体に前処理関数を適用するには、🤗 Datasets <code>map</code> 関数を使用します。 <code>batched=True</code> を設定してデータセットの複数の要素を一度に処理することで、<code>map</code> 関数を高速化できます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>tokenized_wnut = wnut.<span class="hljs-built_in">map</span>(tokenize_and_align_labels, batched=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-qd498c">次に、<code>DataCollat​​orWithPadding</code> を使用してサンプルのバッチを作成します。データセット全体を最大長までパディングするのではなく、照合中にバッチ内の最長の長さまで文を <em>動的にパディング</em> する方が効率的です。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorForTokenClassification
<span class="hljs-meta">&gt;&gt;&gt; </span>data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="evaluate" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluate"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Evaluate</span></h2> <p data-svelte-h="svelte-1ym9fv4">トレーニング中にメトリクスを含めると、多くの場合、モデルのパフォーマンスを評価するのに役立ちます。 🤗 <a href="https://huggingface.co/docs/evaluate/index" rel="nofollow">Evaluate</a> ライブラリを使用して、評価メソッドをすばやくロードできます。このタスクでは、<a href="https://huggingface.co/spaces/evaluate-metric/seqeval" rel="nofollow">seqeval</a> フレームワークを読み込みます (🤗 Evaluate <a href="https://huggingface.co/docs/evaluate/a_quick_tour" rel="nofollow">クイック ツアー</a> を参照してください) ) メトリクスの読み込みと計算の方法について詳しくは、こちらをご覧ください)。 Seqeval は実際に、精度、再現率、F1、精度などのいくつかのスコアを生成します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> evaluate
<span class="hljs-meta">&gt;&gt;&gt; </span>seqeval = evaluate.load(<span class="hljs-string">&quot;seqeval&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wshevw">まず NER ラベルを取得してから、真の予測と真のラベルを <code>compute</code> に渡してスコアを計算する関数を作成します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-meta">&gt;&gt;&gt; </span>labels = [label_list[i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> example[<span class="hljs-string">f&quot;ner_tags&quot;</span>]]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">compute_metrics</span>(<span class="hljs-params">p</span>):
<span class="hljs-meta">... </span> predictions, labels = p
<span class="hljs-meta">... </span> predictions = np.argmax(predictions, axis=<span class="hljs-number">2</span>)
<span class="hljs-meta">... </span> true_predictions = [
<span class="hljs-meta">... </span> [label_list[p] <span class="hljs-keyword">for</span> (p, l) <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(prediction, label) <span class="hljs-keyword">if</span> l != -<span class="hljs-number">100</span>]
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> prediction, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(predictions, labels)
<span class="hljs-meta">... </span> ]
<span class="hljs-meta">... </span> true_labels = [
<span class="hljs-meta">... </span> [label_list[l] <span class="hljs-keyword">for</span> (p, l) <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(prediction, label) <span class="hljs-keyword">if</span> l != -<span class="hljs-number">100</span>]
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> prediction, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(predictions, labels)
<span class="hljs-meta">... </span> ]
<span class="hljs-meta">... </span> results = seqeval.compute(predictions=true_predictions, references=true_labels)
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;precision&quot;</span>: results[<span class="hljs-string">&quot;overall_precision&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;recall&quot;</span>: results[<span class="hljs-string">&quot;overall_recall&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;f1&quot;</span>: results[<span class="hljs-string">&quot;overall_f1&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;accuracy&quot;</span>: results[<span class="hljs-string">&quot;overall_accuracy&quot;</span>],
<span class="hljs-meta">... </span> }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-18cw5xr">これで<code>compute_metrics</code>関数の準備が整いました。トレーニングをセットアップするときにこの関数に戻ります。</p> <h2 class="relative group"><a id="train" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#train"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Train</span></h2> <p data-svelte-h="svelte-i6dg18">モデルのトレーニングを開始する前に、<code>id2label</code><code>label2id</code>を使用して、予想される ID とそのラベルのマップを作成します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>id2label = {
<span class="hljs-meta">... </span> <span class="hljs-number">0</span>: <span class="hljs-string">&quot;O&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">1</span>: <span class="hljs-string">&quot;B-corporation&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">2</span>: <span class="hljs-string">&quot;I-corporation&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">3</span>: <span class="hljs-string">&quot;B-creative-work&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">4</span>: <span class="hljs-string">&quot;I-creative-work&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">5</span>: <span class="hljs-string">&quot;B-group&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">6</span>: <span class="hljs-string">&quot;I-group&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">7</span>: <span class="hljs-string">&quot;B-location&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">8</span>: <span class="hljs-string">&quot;I-location&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">9</span>: <span class="hljs-string">&quot;B-person&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">10</span>: <span class="hljs-string">&quot;I-person&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">11</span>: <span class="hljs-string">&quot;B-product&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-number">12</span>: <span class="hljs-string">&quot;I-product&quot;</span>,
<span class="hljs-meta">... </span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>label2id = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;O&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;B-corporation&quot;</span>: <span class="hljs-number">1</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;I-corporation&quot;</span>: <span class="hljs-number">2</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;B-creative-work&quot;</span>: <span class="hljs-number">3</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;I-creative-work&quot;</span>: <span class="hljs-number">4</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;B-group&quot;</span>: <span class="hljs-number">5</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;I-group&quot;</span>: <span class="hljs-number">6</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;B-location&quot;</span>: <span class="hljs-number">7</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;I-location&quot;</span>: <span class="hljs-number">8</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;B-person&quot;</span>: <span class="hljs-number">9</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;I-person&quot;</span>: <span class="hljs-number">10</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;B-product&quot;</span>: <span class="hljs-number">11</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;I-product&quot;</span>: <span class="hljs-number">12</span>,
<span class="hljs-meta">... </span>}<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-1ubngji"><a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer">Trainer</a> を使用したモデルの微調整に慣れていない場合は、<a href="../training#train-with-pytorch-trainer">ここ</a> の基本的なチュートリアルをご覧ください。</p></blockquote> <p data-svelte-h="svelte-1vtne9l">これでモデルのトレーニングを開始する準備が整いました。 <a href="/docs/transformers/main/ja/model_doc/auto#transformers.AutoModelForTokenClassification">AutoModelForTokenClassification</a> を使用して、予期されるラベルの数とラベル マッピングを指定して DistilBERT を読み込みます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForTokenClassification, TrainingArguments, Trainer
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForTokenClassification.from_pretrained(
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;distilbert/distilbert-base-uncased&quot;</span>, num_labels=<span class="hljs-number">13</span>, id2label=id2label, label2id=label2id
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5p19xw">この時点で残っているステップは 3 つだけです。</p> <ol data-svelte-h="svelte-renpej"><li><a href="/docs/transformers/main/ja/main_classes/trainer#transformers.TrainingArguments">TrainingArguments</a> でトレーニング ハイパーパラメータを定義します。唯一の必須パラメータは、モデルの保存場所を指定する <code>output_dir</code> です。 <code>push_to_hub=True</code>を設定して、このモデルをハブにプッシュします (モデルをアップロードするには、Hugging Face にサインインする必要があります)。各エポックの終了時に、<a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer">Trainer</a> は連続スコアを評価し、トレーニング チェックポイントを保存します。</li> <li>トレーニング引数を、モデル、データセット、トークナイザー、データ照合器、および <code>compute_metrics</code> 関数とともに <a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer">Trainer</a> に渡します。</li> <li><a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer.train">train()</a> を呼び出してモデルを微調整します。</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>training_args = TrainingArguments(
<span class="hljs-meta">... </span> output_dir=<span class="hljs-string">&quot;my_awesome_wnut_model&quot;</span>,
<span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">2e-5</span>,
<span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">16</span>,
<span class="hljs-meta">... </span> per_device_eval_batch_size=<span class="hljs-number">16</span>,
<span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">2</span>,
<span class="hljs-meta">... </span> weight_decay=<span class="hljs-number">0.01</span>,
<span class="hljs-meta">... </span> eval_strategy=<span class="hljs-string">&quot;epoch&quot;</span>,
<span class="hljs-meta">... </span> save_strategy=<span class="hljs-string">&quot;epoch&quot;</span>,
<span class="hljs-meta">... </span> load_best_model_at_end=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span> push_to_hub=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer = Trainer(
<span class="hljs-meta">... </span> model=model,
<span class="hljs-meta">... </span> args=training_args,
<span class="hljs-meta">... </span> train_dataset=tokenized_wnut[<span class="hljs-string">&quot;train&quot;</span>],
<span class="hljs-meta">... </span> eval_dataset=tokenized_wnut[<span class="hljs-string">&quot;test&quot;</span>],
<span class="hljs-meta">... </span> processing_class=tokenizer,
<span class="hljs-meta">... </span> data_collator=data_collator,
<span class="hljs-meta">... </span> compute_metrics=compute_metrics,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ngexm3">トレーニングが完了したら、 <a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer.push_to_hub">push_to_hub()</a> メソッドを使用してモデルをハブに共有し、誰もがモデルを使用できるようにします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>trainer.push_to_hub()<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-bbxcky">トークン分類のモデルを微調整する方法のより詳細な例については、対応するセクションを参照してください。
<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb" rel="nofollow">PyTorch ノートブック</a>
または <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification-tf.ipynb" rel="nofollow">TensorFlow ノートブック</a></p></blockquote> <h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference</span></h2> <p data-svelte-h="svelte-cyrfc8">モデルを微調整したので、それを推論に使用できるようになりました。</p> <p data-svelte-h="svelte-tzt2ke">推論を実行したいテキストをいくつか取得します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>text = <span class="hljs-string">&quot;The Golden State Warriors are an American professional basketball team based in San Francisco.&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pzwrt3">推論用に微調整されたモデルを試す最も簡単な方法は、それを <a href="/docs/transformers/main/ja/main_classes/pipelines#transformers.pipeline">pipeline()</a> で使用することです。モデルを使用して NER の<code>pipeline</code>をインスタンス化し、テキストをそれに渡します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>classifier = pipeline(<span class="hljs-string">&quot;ner&quot;</span>, model=<span class="hljs-string">&quot;stevhliu/my_awesome_wnut_model&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>classifier(text)
[{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;B-location&#x27;</span>,
<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.42658573</span>,
<span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">2</span>,
<span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;golden&#x27;</span>,
<span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">4</span>,
<span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">10</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;I-location&#x27;</span>,
<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.35856336</span>,
<span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">3</span>,
<span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;state&#x27;</span>,
<span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">11</span>,
<span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">16</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;B-group&#x27;</span>,
<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.3064001</span>,
<span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">4</span>,
<span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;warriors&#x27;</span>,
<span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">17</span>,
<span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">25</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;B-location&#x27;</span>,
<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.65523505</span>,
<span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">13</span>,
<span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;san&#x27;</span>,
<span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">80</span>,
<span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">83</span>},
{<span class="hljs-string">&#x27;entity&#x27;</span>: <span class="hljs-string">&#x27;B-location&#x27;</span>,
<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.4668663</span>,
<span class="hljs-string">&#x27;index&#x27;</span>: <span class="hljs-number">14</span>,
<span class="hljs-string">&#x27;word&#x27;</span>: <span class="hljs-string">&#x27;francisco&#x27;</span>,
<span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">84</span>,
<span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">93</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-p649vi">必要に応じて、<code>pipeline</code>の結果を手動で複製することもできます。</p> <p data-svelte-h="svelte-1n0k6or">テキストをトークン化して PyTorch テンソルを返します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;stevhliu/my_awesome_wnut_model&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>inputs = tokenizer(text, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rvunpz">入力をモデルに渡し、<code>logits</code>を返します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForTokenClassification
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForTokenClassification.from_pretrained(<span class="hljs-string">&quot;stevhliu/my_awesome_wnut_model&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> logits = model(**inputs).logits<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-37ihfr">最も高い確率でクラスを取得し、モデルの <code>id2label</code> マッピングを使用してそれをテキスト ラベルに変換します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>predictions = torch.argmax(logits, dim=<span class="hljs-number">2</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>predicted_token_class = [model.config.id2label[t.item()] <span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> predictions[<span class="hljs-number">0</span>]]
<span class="hljs-meta">&gt;&gt;&gt; </span>predicted_token_class
[<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;B-location&#x27;</span>,
<span class="hljs-string">&#x27;I-location&#x27;</span>,
<span class="hljs-string">&#x27;B-group&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;B-location&#x27;</span>,
<span class="hljs-string">&#x27;B-location&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>,
<span class="hljs-string">&#x27;O&#x27;</span>]<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/tasks/token_classification.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1e3a2vv = {
assets: "/docs/transformers/main/ja",
base: "/docs/transformers/main/ja",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/ja/_app/immutable/entry/start.b596590e.js"),
import("/docs/transformers/main/ja/_app/immutable/entry/app.ede2e8a7.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 144],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
76.1 kB
·
Xet hash:
a5c9f9b539052c65439b933b7847899f2dc29ef95d2ba7f70c7e8732f0914234

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.