Buckets:

hf-doc-build/doc / transformers /main /ja /preprocessing.html
HuggingFaceDocBuilder's picture
download
raw
121 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Preprocess&quot;,&quot;local&quot;:&quot;preprocess&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Natural Language Processing&quot;,&quot;local&quot;:&quot;natural-language-processing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pad&quot;,&quot;local&quot;:&quot;pad&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Truncation&quot;,&quot;local&quot;:&quot;truncation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Build tensors&quot;,&quot;local&quot;:&quot;build-tensors&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Audio&quot;,&quot;local&quot;:&quot;audio&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Computer Vision&quot;,&quot;local&quot;:&quot;computer-vision&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pad&quot;,&quot;local&quot;:&quot;pad&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Multi Modal&quot;,&quot;local&quot;:&quot;multi-modal&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/start.18e2cf21.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/scheduler.0c7da001.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/singletons.327967ea.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/paths.bdfd8a0f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/app.262d8b88.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/preload-helper.68965f5e.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.e0be4b2b.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/0.689dfb4a.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/122.92d44ca2.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Tip.f6e0bcd3.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/CopyLLMTxtMenu.6767f26f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.27cfd80f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Youtube.cf53141f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/CodeBlock.418ebe13.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/DocNotebookDropdown.367a2ff8.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Preprocess&quot;,&quot;local&quot;:&quot;preprocess&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Natural Language Processing&quot;,&quot;local&quot;:&quot;natural-language-processing&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pad&quot;,&quot;local&quot;:&quot;pad&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Truncation&quot;,&quot;local&quot;:&quot;truncation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Build tensors&quot;,&quot;local&quot;:&quot;build-tensors&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Audio&quot;,&quot;local&quot;:&quot;audio&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Computer Vision&quot;,&quot;local&quot;:&quot;computer-vision&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Pad&quot;,&quot;local&quot;:&quot;pad&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Multi Modal&quot;,&quot;local&quot;:&quot;multi-modal&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <div class="flex space-x-1 " style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="preprocess" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocess"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Preprocess</span></h1> <p data-svelte-h="svelte-1ubw5gq">データセットでモデルをトレーニングする前に、それをモデルの期待する入力形式に前処理する必要があります。
データがテキスト、画像、またはオーディオであるかどうかにかかわらず、それらはテンソルのバッチに変換して組み立てる必要があります。
🤗 Transformersは、データをモデル用に準備するのに役立つ前処理クラスのセットを提供しています。
このチュートリアルでは、次のことを学びます:</p> <ul data-svelte-h="svelte-11wptdl"><li>テキストの場合、<a href="./main_classes/tokenizer">Tokenizer</a>を使用してテキストをトークンのシーケンスに変換し、トークンの数値表現を作成し、それらをテンソルに組み立てる方法。</li> <li>音声とオーディオの場合、<a href="./main_classes/feature_extractor">Feature extractor</a>を使用してオーディオ波形から連続的な特徴を抽出し、それらをテンソルに変換する方法。</li> <li>画像入力の場合、<a href="./main_classes/image">ImageProcessor</a>を使用して画像をテンソルに変換する方法。</li> <li>マルチモーダル入力の場合、<a href="./main_classes/processors">Processor</a>を使用してトークナイザと特徴抽出器または画像プロセッサを組み合わせる方法。</li></ul> <blockquote class="tip"><p data-svelte-h="svelte-1gtevip"><code>AutoProcessor</code>は常に動作し、使用するモデルに適切なクラスを自動的に選択します。
トークナイザ、画像プロセッサ、特徴抽出器、またはプロセッサを使用しているかにかかわらず、動作します。</p></blockquote> <p data-svelte-h="svelte-otm9x2">始める前に、🤗 Datasetsをインストールして、いくつかのデータセットを試すことができるようにしてください:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-bash "><!-- HTML_TAG_START -->pip install datasets<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="natural-language-processing" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#natural-language-processing"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Natural Language Processing</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/Yffk5aydLzg" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-129ogiy">テキストデータの前処理に使用する主要なツールは、<a href="main_classes/tokenizer">トークナイザ</a>です。トークナイザは、一連のルールに従ってテキストを<em>トークン</em>に分割します。トークンは数値に変換され、その後テンソルに変換され、モデルの入力となります。モデルが必要とする追加の入力は、トークナイザによって追加されます。</p> <blockquote class="tip"><p data-svelte-h="svelte-1mvkm08">事前学習済みモデルを使用する予定の場合、関連する事前学習済みトークナイザを使用することが重要です。これにより、テキストが事前学習コーパスと同じ方法で分割され、事前学習中に通常<em>ボキャブ</em>として参照される対応するトークンインデックスを使用します。</p></blockquote> <p data-svelte-h="svelte-j0cccz"><a href="/docs/transformers/main/ja/model_doc/auto#transformers.AutoTokenizer.from_pretrained">AutoTokenizer.from_pretrained()</a>メソッドを使用して事前学習済みトークナイザをロードして、開始しましょう。これにより、モデルが事前学習された<em>ボキャブ</em>がダウンロードされます:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;google-bert/bert-base-cased&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-8hvk2m">次に、テキストをトークナイザに渡します:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(<span class="hljs-string">&quot;Do not meddle in the affairs of wizards, for they are subtle and quick to anger.&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [<span class="hljs-number">101</span>, <span class="hljs-number">2079</span>, <span class="hljs-number">2025</span>, <span class="hljs-number">19960</span>, <span class="hljs-number">10362</span>, <span class="hljs-number">1999</span>, <span class="hljs-number">1996</span>, <span class="hljs-number">3821</span>, <span class="hljs-number">1997</span>, <span class="hljs-number">16657</span>, <span class="hljs-number">1010</span>, <span class="hljs-number">2005</span>, <span class="hljs-number">2027</span>, <span class="hljs-number">2024</span>, <span class="hljs-number">11259</span>, <span class="hljs-number">1998</span>, <span class="hljs-number">4248</span>, <span class="hljs-number">2000</span>, <span class="hljs-number">4963</span>, <span class="hljs-number">1012</span>, <span class="hljs-number">102</span>],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-n8z1l8">トークナイザは、重要な3つの項目を持つ辞書を返します:</p> <ul data-svelte-h="svelte-wfdruk"><li><a href="glossary#input-ids">input_ids</a> は文中の各トークンに対応するインデックスです。</li> <li><a href="glossary#attention-mask">attention_mask</a> はトークンがアテンションを受ける必要があるかどうかを示します。</li> <li><a href="glossary#token-type-ids">token_type_ids</a> は複数のシーケンスがある場合、トークンがどのシーケンスに属しているかを識別します。</li></ul> <p data-svelte-h="svelte-29z9e2"><code>input_ids</code> をデコードして入力を返します:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.decode(encoded_input[<span class="hljs-string">&quot;input_ids&quot;</span>])
<span class="hljs-string">&#x27;[CLS] 魔法使いの事に干渉するな、彼らは微妙で怒りっぽい。 [SEP]&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1w1usd3">如何にお分かりいただけるかと思いますが、トークナイザはこの文章に2つの特別なトークン、<code>CLS</code>(クラシファイア)と<code>SEP</code>(セパレータ)を追加しました。
すべてのモデルが特別なトークンを必要とするわけではありませんが、必要な場合、トークナイザは自動的にそれらを追加します。</p> <p data-svelte-h="svelte-14un32q">複数の文章を前処理する場合、トークナイザにリストとして渡してください:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_inputs = tokenizer(batch_sentences)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_inputs)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>]],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]}<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="pad" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pad"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pad</span></h3> <p data-svelte-h="svelte-11zrsfs">文章は常に同じ長さではないことがあり、これはテンソル(モデルの入力)が均一な形状を持つ必要があるため問題となります。
パディングは、短い文に特別な「パディングトークン」を追加して、テンソルを長いシーケンスに合わせるための戦略です。</p> <p data-svelte-h="svelte-1bjh3we">バッチ内の短いシーケンスを最長のシーケンスに合わせるために、<code>padding</code>パラメータを<code>True</code>に設定します:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1k64bml">1番目と3番目の文は、短いために<code>0</code>でパディングされています。</p> <h3 class="relative group"><a id="truncation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#truncation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Truncation</span></h3> <p data-svelte-h="svelte-1qgo06i">逆のスペクトルでは、時折、モデルが処理するのに長すぎるシーケンスがあるかもしれません。この場合、シーケンスを短縮する必要があります。</p> <p data-svelte-h="svelte-1v3snjn">モデルが受け入れる最大の長さにシーケンスを切り詰めるには、<code>truncation</code>パラメータを<code>True</code>に設定します:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: [[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]],
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: [[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]}<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-1x8vear">異なるパディングと切り詰めの引数について詳しくは、<a href="./pad_truncation">パディングと切り詰め</a>のコンセプトガイドをご覧ください。</p></blockquote> <h3 class="relative group"><a id="build-tensors" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#build-tensors"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Build tensors</span></h3> <p data-svelte-h="svelte-4bohlh">最後に、トークナイザがモデルに供給される実際のテンソルを返すように設定します。</p> <p data-svelte-h="svelte-ggb8ta"><code>return_tensors</code>パラメータを<code>pt</code>(PyTorch用)または<code>tf</code>(TensorFlow用)に設定します:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>batch_sentences = [
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;But what about second breakfast?&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;Don&#x27;t think he knows about second breakfast, Pip.&quot;</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;What about elevensies?&quot;</span>,
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_input = tokenizer(batch_sentences, padding=<span class="hljs-literal">True</span>, truncation=<span class="hljs-literal">True</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(encoded_input)
{<span class="hljs-string">&#x27;input_ids&#x27;</span>: tensor([[<span class="hljs-number">101</span>, <span class="hljs-number">1252</span>, <span class="hljs-number">1184</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1790</span>, <span class="hljs-number">112</span>, <span class="hljs-number">189</span>, <span class="hljs-number">1341</span>, <span class="hljs-number">1119</span>, <span class="hljs-number">3520</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">1248</span>, <span class="hljs-number">6462</span>, <span class="hljs-number">117</span>, <span class="hljs-number">21902</span>, <span class="hljs-number">1643</span>, <span class="hljs-number">119</span>, <span class="hljs-number">102</span>],
[<span class="hljs-number">101</span>, <span class="hljs-number">1327</span>, <span class="hljs-number">1164</span>, <span class="hljs-number">5450</span>, <span class="hljs-number">23434</span>, <span class="hljs-number">136</span>, <span class="hljs-number">102</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]),
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: tensor([[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]]),
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]])}<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="audio" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#audio"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Audio</span></h2> <p data-svelte-h="svelte-5a6i84">オーディオタスクの場合、データセットをモデル用に準備するために<a href="main_classes/feature_extractor">特徴抽出器</a>が必要です。
特徴抽出器は生のオーディオデータから特徴を抽出し、それらをテンソルに変換するために設計されています。</p> <p data-svelte-h="svelte-x9qiqx"><a href="https://huggingface.co/datasets/PolyAI/minds14" rel="nofollow">PolyAI/minds14</a>データセットをロードして(データセットのロード方法の詳細については🤗 <a href="https://huggingface.co/docs/datasets/load_hub" rel="nofollow">Datasetsチュートリアル</a>を参照)、
オーディオデータセットで特徴抽出器をどのように使用できるかを確認してみましょう:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset, Audio
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;PolyAI/minds14&quot;</span>, name=<span class="hljs-string">&quot;en-US&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-181smcy">アクセスして<code>audio</code>列の最初の要素を確認します。<code>audio</code>列を呼び出すと、自動的にオーディオファイルが読み込まれ、リサンプリングされます:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
{<span class="hljs-string">&#x27;array&#x27;</span>: array([ <span class="hljs-number">0.</span> , <span class="hljs-number">0.00024414</span>, -<span class="hljs-number">0.00024414</span>, ..., -<span class="hljs-number">0.00024414</span>,
<span class="hljs-number">0.</span> , <span class="hljs-number">0.</span> ], dtype=float32),
<span class="hljs-string">&#x27;path&#x27;</span>: <span class="hljs-string">&#x27;/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav&#x27;</span>,
<span class="hljs-string">&#x27;sampling_rate&#x27;</span>: <span class="hljs-number">8000</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-kid8gi">これにより、3つのアイテムが返されます:</p> <ul data-svelte-h="svelte-aqqnhp"><li><code>array</code> は読み込まれた音声信号で、1Dの配列として読み込まれます。必要に応じてリサンプリングされることもあります。</li> <li><code>path</code> は音声ファイルの場所を指します。</li> <li><code>sampling_rate</code> は音声信号内のデータポイントが1秒間にいくつ測定されるかを示します。</li></ul> <p data-svelte-h="svelte-1qdns94">このチュートリアルでは、<a href="https://huggingface.co/facebook/wav2vec2-base" rel="nofollow">Wav2Vec2</a>モデルを使用します。
モデルカードを確認すると、Wav2Vec2が16kHzのサンプリングされた音声オーディオで事前学習されていることがわかります。
モデルの事前学習に使用されたデータセットのサンプリングレートと、あなたのオーディオデータのサンプリングレートが一致することが重要です。
データのサンプリングレートが異なる場合、データをリサンプリングする必要があります。</p> <ol data-svelte-h="svelte-177g3h8"><li>🤗 Datasetsの <code>cast_column</code> メソッドを使用して、サンプリングレートを16kHzにアップサンプリングします:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset = dataset.cast_column(<span class="hljs-string">&quot;audio&quot;</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))<!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-16dulyv"><li>再び <code>audio</code> 列を呼び出してオーディオファイルをリサンプルします:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
{<span class="hljs-string">&#x27;array&#x27;</span>: array([ <span class="hljs-number">2.3443763e-05</span>, <span class="hljs-number">2.1729663e-04</span>, <span class="hljs-number">2.2145823e-04</span>, ...,
<span class="hljs-number">3.8356509e-05</span>, -<span class="hljs-number">7.3497440e-06</span>, -<span class="hljs-number">2.1754686e-05</span>], dtype=float32),
<span class="hljs-string">&#x27;path&#x27;</span>: <span class="hljs-string">&#x27;/root/.cache/huggingface/datasets/downloads/extracted/f14948e0e84be638dd7943ac36518a4cf3324e8b7aa331c5ab11541518e9368c/en-US~JOINT_ACCOUNT/602ba55abb1e6d0fbce92065.wav&#x27;</span>,
<span class="hljs-string">&#x27;sampling_rate&#x27;</span>: <span class="hljs-number">16000</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1106258">次に、入力を正規化しパディングするために特徴抽出器をロードします。テキストデータをパディングする場合、短いシーケンスには <code>0</code> が追加されます。同じ考え方がオーディオデータにも適用されます。特徴抽出器は <code>array</code><code>0</code> を追加します(これは無音として解釈されます)。</p> <p data-svelte-h="svelte-1w7kgtt"><a href="/docs/transformers/main/ja/model_doc/auto#transformers.AutoFeatureExtractor.from_pretrained">AutoFeatureExtractor.from_pretrained()</a>を使用して特徴抽出器をロードします:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoFeatureExtractor
<span class="hljs-meta">&gt;&gt;&gt; </span>feature_extractor = AutoFeatureExtractor.from_pretrained(<span class="hljs-string">&quot;facebook/wav2vec2-base&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3botgm">オーディオ <code>array</code> を特徴抽出器に渡します。特徴抽出器で発生する可能性のある無音エラーをより良くデバッグするために、特徴抽出器に <code>sampling_rate</code> 引数を追加することをお勧めします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>audio_input = [dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>][<span class="hljs-string">&quot;array&quot;</span>]]
<span class="hljs-meta">&gt;&gt;&gt; </span>feature_extractor(audio_input, sampling_rate=<span class="hljs-number">16000</span>)
{<span class="hljs-string">&#x27;input_values&#x27;</span>: [array([ <span class="hljs-number">3.8106556e-04</span>, <span class="hljs-number">2.7506407e-03</span>, <span class="hljs-number">2.8015103e-03</span>, ...,
<span class="hljs-number">5.6335266e-04</span>, <span class="hljs-number">4.6588284e-06</span>, -<span class="hljs-number">1.7142107e-04</span>], dtype=float32)]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-e60mhe">同様に、トークナイザと同様に、バッチ内の可変シーケンスを処理するためにパディングまたは切り詰めを適用できます。次に、これらの2つのオーディオサンプルのシーケンス長を確認してみましょう:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>][<span class="hljs-string">&quot;array&quot;</span>].shape
(<span class="hljs-number">173398</span>,)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">1</span>][<span class="hljs-string">&quot;audio&quot;</span>][<span class="hljs-string">&quot;array&quot;</span>].shape
(<span class="hljs-number">106496</span>,)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1behdst">この関数は、データセットを前処理してオーディオサンプルの長さを同じにするためのものです。最大サンプル長を指定し、特徴抽出器はシーケンスをそれに合わせてパディングまたは切り詰めます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> audio_arrays = [x[<span class="hljs-string">&quot;array&quot;</span>] <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;audio&quot;</span>]]
<span class="hljs-meta">... </span> inputs = feature_extractor(
<span class="hljs-meta">... </span> audio_arrays,
<span class="hljs-meta">... </span> sampling_rate=<span class="hljs-number">16000</span>,
<span class="hljs-meta">... </span> padding=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span> max_length=<span class="hljs-number">100000</span>,
<span class="hljs-meta">... </span> truncation=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span> )
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> inputs<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11qgqhs"><code>preprocess_function</code>をデータセットの最初の数例に適用します:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>processed_dataset = preprocess_function(dataset[:<span class="hljs-number">5</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-uei1o">サンプルの長さは現在同じで、指定された最大長と一致しています。これで処理されたデータセットをモデルに渡すことができます!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>processed_dataset[<span class="hljs-string">&quot;input_values&quot;</span>][<span class="hljs-number">0</span>].shape
(<span class="hljs-number">100000</span>,)
<span class="hljs-meta">&gt;&gt;&gt; </span>processed_dataset[<span class="hljs-string">&quot;input_values&quot;</span>][<span class="hljs-number">1</span>].shape
(<span class="hljs-number">100000</span>,)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="computer-vision" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#computer-vision"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Computer Vision</span></h2> <p data-svelte-h="svelte-xt5wwu">コンピュータビジョンタスクでは、モデル用にデータセットを準備するための<a href="main_classes/image_processor">画像プロセッサ</a>が必要です。
画像の前処理には、画像をモデルが期待する入力形式に変換するためのいくつかのステップが含まれています。これらのステップには、リサイズ、正規化、カラーチャネルの補正、および画像をテンソルに変換するなどが含まれます。</p> <blockquote class="tip"><p data-svelte-h="svelte-5jimup">画像の前処理は、通常、画像の増強の形式に従います。画像の前処理と画像の増強の両方は画像データを変換しますが、異なる目的があります:</p> <ul data-svelte-h="svelte-x2xmel"><li>画像の増強は、過学習を防ぎ、モデルの堅牢性を向上させるのに役立つ方法で画像を変更します。データを増強する方法は無限で、明るさや色の調整、クロップ、回転、リサイズ、ズームなど、様々な方法があります。ただし、増強操作によって画像の意味が変わらないように注意する必要があります。</li> <li>画像の前処理は、画像がモデルの期待する入力形式と一致することを保証します。コンピュータビジョンモデルをファインチューニングする場合、画像はモデルが最初にトレーニングされたときとまったく同じ方法で前処理する必要があります。</li></ul> <p data-svelte-h="svelte-1r84kp2">画像の増強には任意のライブラリを使用できます。画像の前処理には、モデルに関連付けられた<code>ImageProcessor</code>を使用します。</p></blockquote> <p data-svelte-h="svelte-1enbnqj">コンピュータビジョンのデータセットで画像プロセッサを使用する方法を示すために、<a href="https://huggingface.co/datasets/ethz/food101" rel="nofollow">food101</a>データセットをロードします(データセットのロード方法の詳細については🤗<a href="https://huggingface.co/docs/datasets/load_hub" rel="nofollow">Datasetsチュートリアル</a>を参照):</p> <blockquote class="tip"><p data-svelte-h="svelte-i06ozh">データセットがかなり大きいため、🤗 Datasetsの<code>split</code>パラメータを使用してトレーニングデータの小さなサンプルのみをロードします!</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;ethz/food101&quot;</span>, split=<span class="hljs-string">&quot;train[:100]&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1df0fno">次に、🤗 Datasetsの <a href="https://huggingface.co/docs/datasets/package_reference/main_classes?highlight=image#datasets.Image" rel="nofollow"><code>Image</code></a> 機能で画像を見てみましょう:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;image&quot;</span>]<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1jzdron"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png"></div> <p data-svelte-h="svelte-eigu37">AutoImageProcessorを<a href="/docs/transformers/main/ja/model_doc/auto#transformers.AutoImageProcessor.from_pretrained">AutoImageProcessor.from_pretrained()</a>を使用してロードします:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>)<!-- HTML_TAG_END --></pre></div> <ol data-svelte-h="svelte-380xnj"><li><p>まず、画像の拡張を追加しましょう。好きなライブラリを使用できますが、このチュートリアルではtorchvisionの<a href="https://pytorch.org/vision/stable/transforms.html" rel="nofollow"><code>transforms</code></a>モジュールを使用します。別のデータ拡張ライブラリを使用したい場合は、<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb" rel="nofollow">Albumentations</a>または<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb" rel="nofollow">Kornia notebooks</a>で詳細を学ぶことができます。</p> <p>ここでは、<a href="https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html" rel="nofollow"><code>Compose</code></a>を使用していくつかの変換を連鎖させます - <a href="https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html" rel="nofollow"><code>RandomResizedCrop</code></a><a href="https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html" rel="nofollow"><code>ColorJitter</code></a>
サイズの変更に関しては、<code>image_processor</code>から画像サイズの要件を取得できます。
一部のモデルでは、正確な高さと幅が必要ですが、他のモデルでは<code>shortest_edge</code>のみが定義されています。</p></li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> torchvision.transforms <span class="hljs-keyword">import</span> RandomResizedCrop, ColorJitter, Compose
<span class="hljs-meta">&gt;&gt;&gt; </span>size = (
<span class="hljs-meta">... </span> image_processor.size[<span class="hljs-string">&quot;shortest_edge&quot;</span>]
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-string">&quot;shortest_edge&quot;</span> <span class="hljs-keyword">in</span> image_processor.size
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span> (image_processor.size[<span class="hljs-string">&quot;height&quot;</span>], image_processor.size[<span class="hljs-string">&quot;width&quot;</span>])
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=<span class="hljs-number">0.5</span>, hue=<span class="hljs-number">0.5</span>)])<!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-154pokx"><li>モデルは<a href="model_doc/visionencoderdecoder#transformers.VisionEncoderDecoderModel.forward.pixel_values"><code>pixel_values</code></a>を入力として受け取ります。
<code>ImageProcessor</code>は画像の正規化と適切なテンソルの生成を処理できます。
一連の画像に対する画像拡張と画像前処理を組み合わせ、<code>pixel_values</code>を生成する関数を作成します:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> images = [_transforms(img.convert(<span class="hljs-string">&quot;RGB&quot;</span>)) <span class="hljs-keyword">for</span> img <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;image&quot;</span>]]
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;pixel_values&quot;</span>] = image_processor(images, do_resize=<span class="hljs-literal">False</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)[<span class="hljs-string">&quot;pixel_values&quot;</span>]
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-twhr4h">上記の例では、画像のサイズ変更を既に画像増強変換で行っているため、<code>do_resize=False</code>を設定しました。
適切な <code>image_processor</code> からの <code>size</code> 属性を活用しています。画像増強中に画像のサイズ変更を行わない場合は、このパラメータを省略してください。
デフォルトでは、<code>ImageProcessor</code> がサイズ変更を処理します。</p> <p data-svelte-h="svelte-1ir8nuv">画像を増強変換の一部として正規化したい場合は、<code>image_processor.image_mean</code><code>image_processor.image_std</code> の値を使用してください。</p></blockquote> <ol start="3" data-svelte-h="svelte-1hfzjfw"><li>次に、🤗 Datasetsの<a href="https://huggingface.co/docs/datasets/process#format-transform" rel="nofollow"><code>set_transform</code></a>を使用して、変換をリアルタイムで適用します:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset.set_transform(transforms)<!-- HTML_TAG_END --></pre></div> <ol start="4" data-svelte-h="svelte-11u24y6"><li>画像にアクセスすると、画像プロセッサが <code>pixel_values</code> を追加したことがわかります。これで処理済みのデータセットをモデルに渡すことができます!</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>].keys()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1pzyp8l">以下は、変換が適用された後の画像の外観です。 画像はランダムに切り抜かれ、その色の特性も異なります。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt
<span class="hljs-meta">&gt;&gt;&gt; </span>img = dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;pixel_values&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>plt.imshow(img.permute(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">0</span>))<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-mp8qzc"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png"></div> <blockquote class="tip"><p data-svelte-h="svelte-149ytgm">オブジェクト検出、意味セグメンテーション、インスタンスセグメンテーション、およびパノプティックセグメンテーションなどのタスクの場合、<code>ImageProcessor</code>
ポスト処理メソッドを提供します。これらのメソッドは、モデルの生の出力を境界ボックスやセグメンテーションマップなどの意味のある予測に変換します。</p></blockquote> <h3 class="relative group"><a id="pad" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pad"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Pad</span></h3> <p data-svelte-h="svelte-c97zbl">一部の場合、たとえば、<a href="./model_doc/detr">DETR</a>をファインチューニングする場合、モデルはトレーニング時にスケールの変更を適用します。
これにより、バッチ内の画像のサイズが異なる場合があります。<a href="/docs/transformers/main/ja/model_doc/detr#transformers.DetrImageProcessor">DetrImageProcessor</a>から<code>DetrImageProcessor.pad()</code>を使用し、
カスタムの<code>collate_fn</code>を定義して画像を一緒にバッチ処理できます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">collate_fn</span>(<span class="hljs-params">batch</span>):
<span class="hljs-meta">... </span> pixel_values = [item[<span class="hljs-string">&quot;pixel_values&quot;</span>] <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> batch]
<span class="hljs-meta">... </span> encoding = image_processor.pad(pixel_values, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">... </span> labels = [item[<span class="hljs-string">&quot;labels&quot;</span>] <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> batch]
<span class="hljs-meta">... </span> batch = {}
<span class="hljs-meta">... </span> batch[<span class="hljs-string">&quot;pixel_values&quot;</span>] = encoding[<span class="hljs-string">&quot;pixel_values&quot;</span>]
<span class="hljs-meta">... </span> batch[<span class="hljs-string">&quot;pixel_mask&quot;</span>] = encoding[<span class="hljs-string">&quot;pixel_mask&quot;</span>]
<span class="hljs-meta">... </span> batch[<span class="hljs-string">&quot;labels&quot;</span>] = labels
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> batch<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="multi-modal" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#multi-modal"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Multi Modal</span></h2> <p data-svelte-h="svelte-s8srfc">マルチモーダル入力を使用するタスクの場合、モデル用にデータセットを準備するための<a href="main_classes/processors">プロセッサ</a>が必要です。プロセッサは、トークナイザや特徴量抽出器などの2つの処理オブジェクトを結合します。</p> <p data-svelte-h="svelte-1fiplfe">自動音声認識(ASR)のためのプロセッサの使用方法を示すために、<a href="https://huggingface.co/datasets/lj_speech" rel="nofollow">LJ Speech</a>データセットをロードします(データセットのロード方法の詳細については🤗 <a href="https://huggingface.co/docs/datasets/load_hub" rel="nofollow">Datasets チュートリアル</a>を参照):</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech = load_dataset(<span class="hljs-string">&quot;lj_speech&quot;</span>, split=<span class="hljs-string">&quot;train&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-168bdy5">ASR(自動音声認識)の場合、主に <code>audio</code><code>text</code> に焦点を当てているため、他の列を削除できます:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech = lj_speech.<span class="hljs-built_in">map</span>(remove_columns=[<span class="hljs-string">&quot;file&quot;</span>, <span class="hljs-string">&quot;id&quot;</span>, <span class="hljs-string">&quot;normalized_text&quot;</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1m3tjs9">次に、<code>audio</code><code>text</code>の列を見てみましょう:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-python "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;audio&quot;</span>]
{<span class="hljs-string">&#x27;array&#x27;</span>: array([-<span class="hljs-number">7.3242188e-04</span>, -<span class="hljs-number">7.6293945e-04</span>, -<span class="hljs-number">6.4086914e-04</span>, ...,
<span class="hljs-number">7.3242188e-04</span>, <span class="hljs-number">2.1362305e-04</span>, <span class="hljs-number">6.1035156e-05</span>], dtype=float32),
<span class="hljs-string">&#x27;path&#x27;</span>: <span class="hljs-string">&#x27;/root/.cache/huggingface/datasets/downloads/extracted/917ece08c95cf0c4115e45294e3cd0dee724a1165b7fc11798369308a465bd26/LJSpeech-1.1/wavs/LJ001-0001.wav&#x27;</span>,
<span class="hljs-string">&#x27;sampling_rate&#x27;</span>: <span class="hljs-number">22050</span>}
<span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;text&quot;</span>]
<span class="hljs-string">&#x27;Printing, in the only sense with which we are at present concerned, differs from most if not from all the arts and crafts represented in the Exhibition&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1k9tvup">常に、オーディオデータセットのサンプリングレートを、モデルの事前学習に使用されたデータセットのサンプリングレートと一致させるように<a href="preprocessing#audio">リサンプル</a>する必要があります!</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>lj_speech = lj_speech.cast_column(<span class="hljs-string">&quot;audio&quot;</span>, Audio(sampling_rate=<span class="hljs-number">16_000</span>))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10y9lgc">プロセッサを <a href="/docs/transformers/main/ja/model_doc/auto#transformers.AutoProcessor.from_pretrained">AutoProcessor.from_pretrained()</a> を使用してロードします:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">&quot;facebook/wav2vec2-base-960h&quot;</span>)<!-- HTML_TAG_END --></pre></div> <ol data-svelte-h="svelte-1e9n8c9"><li><code>array</code>内に含まれるオーディオデータを<code>input_values</code>に処理し、<code>text</code><code>labels</code>にトークン化する関数を作成します:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">prepare_dataset</span>(<span class="hljs-params">example</span>):
<span class="hljs-meta">... </span> audio = example[<span class="hljs-string">&quot;audio&quot;</span>]
<span class="hljs-meta">... </span> example.update(processor(audio=audio[<span class="hljs-string">&quot;array&quot;</span>], text=example[<span class="hljs-string">&quot;text&quot;</span>], sampling_rate=<span class="hljs-number">16000</span>))
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> example<!-- HTML_TAG_END --></pre></div> <ol start="2" data-svelte-h="svelte-l74913"><li>サンプルに<code>prepare_dataset</code>関数を適用します:</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>prepare_dataset(lj_speech[<span class="hljs-number">0</span>])<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/preprocessing.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1brk4am = {
assets: "/docs/transformers/main/ja",
base: "/docs/transformers/main/ja",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/ja/_app/immutable/entry/start.18e2cf21.js"),
import("/docs/transformers/main/ja/_app/immutable/entry/app.262d8b88.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 122],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
121 kB
·
Xet hash:
8ecfb189a27c9b7224e1cba86049c554eab4fdf4069e9deca9307d9698adcc00

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.