Buckets:

hf-doc-build/doc-dev / transformers /main /ja /tasks /object_detection.html
rtrm's picture
download
raw
99.2 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Object detection&quot;,&quot;local&quot;:&quot;object-detection&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Load the CPPE-5 dataset&quot;,&quot;local&quot;:&quot;load-the-cppe-5-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Preprocess the data&quot;,&quot;local&quot;:&quot;preprocess-the-data&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Training the DETR model&quot;,&quot;local&quot;:&quot;training-the-detr-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Evaluate&quot;,&quot;local&quot;:&quot;evaluate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference&quot;,&quot;local&quot;:&quot;inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/start.1486e459.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/scheduler.9bc65507.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/singletons.eee55cbf.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.3b203c72.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/paths.59da1547.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/entry/app.d9ae818f.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/index.707bf1b6.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/0.c06aa070.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/nodes/147.a854e3cf.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/Tip.c2ecdbf4.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/CodeBlock.54a9f38d.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/DocNotebookDropdown.41f65cb5.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/transformers/main/ja/_app/immutable/chunks/EditOnGithub.922df6ba.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Object detection&quot;,&quot;local&quot;:&quot;object-detection&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Load the CPPE-5 dataset&quot;,&quot;local&quot;:&quot;load-the-cppe-5-dataset&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Preprocess the data&quot;,&quot;local&quot;:&quot;preprocess-the-data&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Training the DETR model&quot;,&quot;local&quot;:&quot;training-the-detr-model&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Evaluate&quot;,&quot;local&quot;:&quot;evaluate&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Inference&quot;,&quot;local&quot;:&quot;inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="object-detection" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#object-detection"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Object detection</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <p data-svelte-h="svelte-5r6akf">オブジェクト検出は、画像内のインスタンス (人間、建物、車など) を検出するコンピューター ビジョン タスクです。物体検出モデルは画像を入力および出力として受け取ります
検出されたオブジェクトの境界ボックスと関連するラベルの座標。画像には複数のオブジェクトを含めることができます。
それぞれに独自の境界ボックスとラベルがあり (例: 車と建物を持つことができます)、各オブジェクトは
画像のさまざまな部分に存在する必要があります (たとえば、画像には複数の車が含まれている可能性があります)。
このタスクは、歩行者、道路標識、信号機などを検出するために自動運転で一般的に使用されます。
他のアプリケーションには、画像内のオブジェクトのカウント、画像検索などが含まれます。</p> <p data-svelte-h="svelte-sw406c">このガイドでは、次の方法を学習します。</p> <ol data-svelte-h="svelte-gfibwd"><li>Finetune <a href="https://huggingface.co/docs/transformers/model_doc/detr" rel="nofollow">DETR</a>、畳み込みアルゴリズムを組み合わせたモデル
<a href="https://huggingface.co/datasets/cppe-5" rel="nofollow">CPPE-5</a> 上のエンコーダー/デコーダー トランスフォーマーを備えたバックボーン
データセット。</li> <li>微調整したモデルを推論に使用します。</li></ol> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-119id0v">このタスクと互換性のあるすべてのアーキテクチャとチェックポイントを確認するには、<a href="https://huggingface.co/tasks/object-detection" rel="nofollow">タスクページ</a> を確認することをお勧めします。</p></div> <p data-svelte-h="svelte-1lya3k8">始める前に、必要なライブラリがすべてインストールされていることを確認してください。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -q datasets transformers evaluate timm albumentations<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1heb5sb">🤗 データセットを使用して Hugging Face Hub からデータセットをロードし、🤗 トランスフォーマーを使用してモデルをトレーニングします。
データを増強するための<code>albumentations</code><code>timm</code> は現在、DETR モデルの畳み込みバックボーンをロードするために必要です。</p> <p data-svelte-h="svelte-km1v2k">モデルをコミュニティと共有することをお勧めします。 Hugging Face アカウントにログインして、ハブにアップロードします。
プロンプトが表示されたら、トークンを入力してログインします。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
<span class="hljs-meta">&gt;&gt;&gt; </span>notebook_login()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="load-the-cppe-5-dataset" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-the-cppe-5-dataset"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Load the CPPE-5 dataset</span></h2> <p data-svelte-h="svelte-omjhw2"><a href="https://huggingface.co/datasets/cppe-5" rel="nofollow">CPPE-5 データセット</a> には、次の画像が含まれています。
新型コロナウイルス感染症のパンデミックにおける医療用個人保護具 (PPE) を識別する注釈。</p> <p data-svelte-h="svelte-1kjcyvo">データセットをロードすることから始めます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>cppe5 = load_dataset(<span class="hljs-string">&quot;cppe-5&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>cppe5
DatasetDict({
train: Dataset({
features: [<span class="hljs-string">&#x27;image_id&#x27;</span>, <span class="hljs-string">&#x27;image&#x27;</span>, <span class="hljs-string">&#x27;width&#x27;</span>, <span class="hljs-string">&#x27;height&#x27;</span>, <span class="hljs-string">&#x27;objects&#x27;</span>],
num_rows: <span class="hljs-number">1000</span>
})
test: Dataset({
features: [<span class="hljs-string">&#x27;image_id&#x27;</span>, <span class="hljs-string">&#x27;image&#x27;</span>, <span class="hljs-string">&#x27;width&#x27;</span>, <span class="hljs-string">&#x27;height&#x27;</span>, <span class="hljs-string">&#x27;objects&#x27;</span>],
num_rows: <span class="hljs-number">29</span>
})
})<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1edqlqi">このデータセットには、1000 枚の画像を含むトレーニング セットと 29 枚の画像を含むテスト セットがすでに付属していることがわかります。</p> <p data-svelte-h="svelte-h4ffk6">データに慣れるために、例がどのようなものかを調べてください。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>cppe5[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">0</span>]
{<span class="hljs-string">&#x27;image_id&#x27;</span>: <span class="hljs-number">15</span>,
<span class="hljs-string">&#x27;image&#x27;</span>: &lt;PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=943x663 at <span class="hljs-number">0x7F9EC9E77C10</span>&gt;,
<span class="hljs-string">&#x27;width&#x27;</span>: <span class="hljs-number">943</span>,
<span class="hljs-string">&#x27;height&#x27;</span>: <span class="hljs-number">663</span>,
<span class="hljs-string">&#x27;objects&#x27;</span>: {<span class="hljs-string">&#x27;id&#x27;</span>: [<span class="hljs-number">114</span>, <span class="hljs-number">115</span>, <span class="hljs-number">116</span>, <span class="hljs-number">117</span>],
<span class="hljs-string">&#x27;area&#x27;</span>: [<span class="hljs-number">3796</span>, <span class="hljs-number">1596</span>, <span class="hljs-number">152768</span>, <span class="hljs-number">81002</span>],
<span class="hljs-string">&#x27;bbox&#x27;</span>: [[<span class="hljs-number">302.0</span>, <span class="hljs-number">109.0</span>, <span class="hljs-number">73.0</span>, <span class="hljs-number">52.0</span>],
[<span class="hljs-number">810.0</span>, <span class="hljs-number">100.0</span>, <span class="hljs-number">57.0</span>, <span class="hljs-number">28.0</span>],
[<span class="hljs-number">160.0</span>, <span class="hljs-number">31.0</span>, <span class="hljs-number">248.0</span>, <span class="hljs-number">616.0</span>],
[<span class="hljs-number">741.0</span>, <span class="hljs-number">68.0</span>, <span class="hljs-number">202.0</span>, <span class="hljs-number">401.0</span>]],
<span class="hljs-string">&#x27;category&#x27;</span>: [<span class="hljs-number">4</span>, <span class="hljs-number">4</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>]}}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13hht2o">データセット内の例には次のフィールドがあります。</p> <ul data-svelte-h="svelte-fjri37"><li><code>image_id</code>: サンプルの画像ID</li> <li><code>image</code>: 画像を含む <code>PIL.Image.Image</code> オブジェクト</li> <li><code>width</code>: 画像の幅</li> <li><code>height</code>: 画像の高さ</li> <li><code>objects</code>: 画像内のオブジェクトの境界ボックスのメタデータを含む辞書:<ul><li><code>id</code>: アノテーションID</li> <li><code>area</code>: 境界ボックスの領域</li> <li><code>bbox</code>: オブジェクトの境界ボックス (<a href="https://albumentations.ai/docs/getting_started/bounding_boxes_augmentation/#coco" rel="nofollow">COCO 形式</a> )</li> <li><code>category</code>: オブジェクトのカテゴリー。可能な値には、<code>Coverall (0)</code><code>Face_Shield (1)</code><code>Gloves (2)</code><code>Goggles (3)</code>、および <code>Mask (4)</code> が含まれます。</li></ul></li></ul> <p data-svelte-h="svelte-y6a2g5"><code>bbox</code>フィールドが COCO 形式に従っていることに気づくかもしれません。これは DETR モデルが予期する形式です。
ただし、「オブジェクト」内のフィールドのグループ化は、DETR が必要とする注釈形式とは異なります。あなたはするであろう
このデータをトレーニングに使用する前に、いくつかの前処理変換を適用する必要があります。</p> <p data-svelte-h="svelte-1q29g0x">データをさらに深く理解するには、データセット内の例を視覚化します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> os
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image, ImageDraw
<span class="hljs-meta">&gt;&gt;&gt; </span>image = cppe5[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;image&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>annotations = cppe5[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;objects&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>draw = ImageDraw.Draw(image)
<span class="hljs-meta">&gt;&gt;&gt; </span>categories = cppe5[<span class="hljs-string">&quot;train&quot;</span>].features[<span class="hljs-string">&quot;objects&quot;</span>].feature[<span class="hljs-string">&quot;category&quot;</span>].names
<span class="hljs-meta">&gt;&gt;&gt; </span>id2label = {index: x <span class="hljs-keyword">for</span> index, x <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(categories, start=<span class="hljs-number">0</span>)}
<span class="hljs-meta">&gt;&gt;&gt; </span>label2id = {v: k <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> id2label.items()}
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(annotations[<span class="hljs-string">&quot;id&quot;</span>])):
<span class="hljs-meta">... </span> box = annotations[<span class="hljs-string">&quot;bbox&quot;</span>][i]
<span class="hljs-meta">... </span> class_idx = annotations[<span class="hljs-string">&quot;category&quot;</span>][i]
<span class="hljs-meta">... </span> x, y, w, h = <span class="hljs-built_in">tuple</span>(box)
<span class="hljs-meta">... </span> draw.rectangle((x, y, x + w, y + h), outline=<span class="hljs-string">&quot;red&quot;</span>, width=<span class="hljs-number">1</span>)
<span class="hljs-meta">... </span> draw.text((x, y), id2label[class_idx], fill=<span class="hljs-string">&quot;white&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1mkaz8h"><img src="https://i.imgur.com/TdaqPJO.png" alt="CPPE-5 Image Example"></div> <p data-svelte-h="svelte-19dg06i">関連付けられたラベルを使用して境界ボックスを視覚化するには、データセットのメタデータからラベルを取得します。
<code>category</code>フィールド。
また、ラベル ID をラベル クラスにマッピングする辞書 (<code>id2label</code>) やその逆 (<code>label2id</code>) を作成することもできます。
これらは、後でモデルをセットアップするときに使用できます。これらのマップを含めると、共有した場合に他の人がモデルを再利用できるようになります。
ハグフェイスハブに取り付けます。</p> <p data-svelte-h="svelte-1xdk7br">データに慣れるための最後のステップとして、潜在的な問題がないかデータを調査します。データセットに関する一般的な問題の 1 つは、
オブジェクト検出は、画像の端を越えて「伸びる」境界ボックスです。このような「暴走」境界ボックスは、
トレーニング中にエラーが発生するため、この段階で対処する必要があります。このデータセットには、この問題に関する例がいくつかあります。
このガイドでは内容をわかりやすくするために、これらの画像をデータから削除します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>remove_idx = [<span class="hljs-number">590</span>, <span class="hljs-number">821</span>, <span class="hljs-number">822</span>, <span class="hljs-number">875</span>, <span class="hljs-number">876</span>, <span class="hljs-number">878</span>, <span class="hljs-number">879</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>keep = [i <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(cppe5[<span class="hljs-string">&quot;train&quot;</span>])) <span class="hljs-keyword">if</span> i <span class="hljs-keyword">not</span> <span class="hljs-keyword">in</span> remove_idx]
<span class="hljs-meta">&gt;&gt;&gt; </span>cppe5[<span class="hljs-string">&quot;train&quot;</span>] = cppe5[<span class="hljs-string">&quot;train&quot;</span>].select(keep)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="preprocess-the-data" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocess-the-data"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Preprocess the data</span></h2> <p data-svelte-h="svelte-76sjmi">モデルを微調整するには、事前トレーニングされたモデルに使用されるアプローチと正確に一致するように、使用する予定のデータを前処理する必要があります。
<a href="/docs/transformers/main/ja/model_doc/auto#transformers.AutoImageProcessor">AutoImageProcessor</a> は、画像データを処理して <code>pixel_values</code><code>pixel_mask</code>、および
DETR モデルをトレーニングできる「ラベル」。画像プロセッサには、心配する必要のないいくつかの属性があります。</p> <ul data-svelte-h="svelte-9xz2l6"><li><code>image_mean = [0.485, 0.456, 0.406 ]</code></li> <li><code>image_std = [0.229, 0.224, 0.225]</code></li></ul> <p data-svelte-h="svelte-w4h0v0">これらは、モデルの事前トレーニング中に画像を正規化するために使用される平均と標準偏差です。これらの価値観は非常に重要です
事前にトレーニングされた画像モデルを推論または微調整するときに複製します。</p> <p data-svelte-h="svelte-16cx11i">微調整するモデルと同じチェックポイントからイメージ プロセッサをインスタンス化します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span>checkpoint = <span class="hljs-string">&quot;facebook/detr-resnet-50&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = AutoImageProcessor.from_pretrained(checkpoint)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1i23rsc">画像を<code>image_processor</code>に渡す前に、2 つの前処理変換をデータセットに適用します。</p> <ul data-svelte-h="svelte-17t0a11"><li>画像の拡張</li> <li>DETR の期待に応えるための注釈の再フォーマット</li></ul> <p data-svelte-h="svelte-18jci73">まず、モデルがトレーニング データにオーバーフィットしないようにするために、任意のデータ拡張ライブラリを使用して画像拡張を適用できます。ここでは<a href="https://albumentations.ai/docs/" rel="nofollow">Albumentations</a>を使用します…
このライブラリは、変換が画像に影響を与え、それに応じて境界ボックスを更新することを保証します。
🤗 データセット ライブラリのドキュメントには、詳細な <a href="https://huggingface.co/docs/datasets/object_detection" rel="nofollow">物体検出用に画像を拡張する方法に関するガイド</a> が記載されています。
例としてまったく同じデータセットを使用しています。ここでも同じアプローチを適用し、各画像のサイズを (480, 480) に変更します。
水平に反転して明るくします。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> albumentations
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span>transform = albumentations.Compose(
<span class="hljs-meta">... </span> [
<span class="hljs-meta">... </span> albumentations.Resize(<span class="hljs-number">480</span>, <span class="hljs-number">480</span>),
<span class="hljs-meta">... </span> albumentations.HorizontalFlip(p=<span class="hljs-number">1.0</span>),
<span class="hljs-meta">... </span> albumentations.RandomBrightnessContrast(p=<span class="hljs-number">1.0</span>),
<span class="hljs-meta">... </span> ],
<span class="hljs-meta">... </span> bbox_params=albumentations.BboxParams(<span class="hljs-built_in">format</span>=<span class="hljs-string">&quot;coco&quot;</span>, label_fields=[<span class="hljs-string">&quot;category&quot;</span>]),
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3sh3zj"><code>image_processor</code> は、注釈が次の形式であることを期待します: <code>{&#39;image_id&#39;: int, &#39;annotations&#39;: List[Dict]}</code>,
ここで、各辞書は COCO オブジェクトの注釈です。 1 つの例として、注釈を再フォーマットする関数を追加してみましょう。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">formatted_anns</span>(<span class="hljs-params">image_id, category, area, bbox</span>):
<span class="hljs-meta">... </span> annotations = []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">0</span>, <span class="hljs-built_in">len</span>(category)):
<span class="hljs-meta">... </span> new_ann = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;image_id&quot;</span>: image_id,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;category_id&quot;</span>: category[i],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;isCrowd&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;area&quot;</span>: area[i],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;bbox&quot;</span>: <span class="hljs-built_in">list</span>(bbox[i]),
<span class="hljs-meta">... </span> }
<span class="hljs-meta">... </span> annotations.append(new_ann)
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> annotations<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-cpkzfa">これで、画像と注釈の変換を組み合わせてサンプルのバッチで使用できるようになりました。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># transforming a batch</span>
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transform_aug_ann</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> image_ids = examples[<span class="hljs-string">&quot;image_id&quot;</span>]
<span class="hljs-meta">... </span> images, bboxes, area, categories = [], [], [], []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> image, objects <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(examples[<span class="hljs-string">&quot;image&quot;</span>], examples[<span class="hljs-string">&quot;objects&quot;</span>]):
<span class="hljs-meta">... </span> image = np.array(image.convert(<span class="hljs-string">&quot;RGB&quot;</span>))[:, :, ::-<span class="hljs-number">1</span>]
<span class="hljs-meta">... </span> out = transform(image=image, bboxes=objects[<span class="hljs-string">&quot;bbox&quot;</span>], category=objects[<span class="hljs-string">&quot;category&quot;</span>])
<span class="hljs-meta">... </span> area.append(objects[<span class="hljs-string">&quot;area&quot;</span>])
<span class="hljs-meta">... </span> images.append(out[<span class="hljs-string">&quot;image&quot;</span>])
<span class="hljs-meta">... </span> bboxes.append(out[<span class="hljs-string">&quot;bboxes&quot;</span>])
<span class="hljs-meta">... </span> categories.append(out[<span class="hljs-string">&quot;category&quot;</span>])
<span class="hljs-meta">... </span> targets = [
<span class="hljs-meta">... </span> {<span class="hljs-string">&quot;image_id&quot;</span>: id_, <span class="hljs-string">&quot;annotations&quot;</span>: formatted_anns(id_, cat_, ar_, box_)}
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> id_, cat_, ar_, box_ <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(image_ids, categories, area, bboxes)
<span class="hljs-meta">... </span> ]
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> image_processor(images=images, annotations=targets, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-fnbbog">🤗 Datasets <code>with_transform</code> メソッドを使用して、この前処理関数をデータセット全体に適用します。この方法が適用されるのは、
データセットの要素を読み込むときに、その場で変換します。</p> <p data-svelte-h="svelte-1ddlogf">この時点で、データセットの例が変換後にどのようになるかを確認できます。テンソルが表示されるはずです
<code>pixel_values</code>、テンソルと <code>pixel_mask</code>、および <code>labels</code> を使用します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>cppe5[<span class="hljs-string">&quot;train&quot;</span>] = cppe5[<span class="hljs-string">&quot;train&quot;</span>].with_transform(transform_aug_ann)
<span class="hljs-meta">&gt;&gt;&gt; </span>cppe5[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">15</span>]
{<span class="hljs-string">&#x27;pixel_values&#x27;</span>: tensor([[[ <span class="hljs-number">0.9132</span>, <span class="hljs-number">0.9132</span>, <span class="hljs-number">0.9132</span>, ..., -<span class="hljs-number">1.9809</span>, -<span class="hljs-number">1.9809</span>, -<span class="hljs-number">1.9809</span>],
[ <span class="hljs-number">0.9132</span>, <span class="hljs-number">0.9132</span>, <span class="hljs-number">0.9132</span>, ..., -<span class="hljs-number">1.9809</span>, -<span class="hljs-number">1.9809</span>, -<span class="hljs-number">1.9809</span>],
[ <span class="hljs-number">0.9132</span>, <span class="hljs-number">0.9132</span>, <span class="hljs-number">0.9132</span>, ..., -<span class="hljs-number">1.9638</span>, -<span class="hljs-number">1.9638</span>, -<span class="hljs-number">1.9638</span>],
...,
[-<span class="hljs-number">1.5699</span>, -<span class="hljs-number">1.5699</span>, -<span class="hljs-number">1.5699</span>, ..., -<span class="hljs-number">1.9980</span>, -<span class="hljs-number">1.9980</span>, -<span class="hljs-number">1.9980</span>],
[-<span class="hljs-number">1.5528</span>, -<span class="hljs-number">1.5528</span>, -<span class="hljs-number">1.5528</span>, ..., -<span class="hljs-number">1.9980</span>, -<span class="hljs-number">1.9809</span>, -<span class="hljs-number">1.9809</span>],
[-<span class="hljs-number">1.5528</span>, -<span class="hljs-number">1.5528</span>, -<span class="hljs-number">1.5528</span>, ..., -<span class="hljs-number">1.9980</span>, -<span class="hljs-number">1.9809</span>, -<span class="hljs-number">1.9809</span>]],
[[ <span class="hljs-number">1.3081</span>, <span class="hljs-number">1.3081</span>, <span class="hljs-number">1.3081</span>, ..., -<span class="hljs-number">1.8431</span>, -<span class="hljs-number">1.8431</span>, -<span class="hljs-number">1.8431</span>],
[ <span class="hljs-number">1.3081</span>, <span class="hljs-number">1.3081</span>, <span class="hljs-number">1.3081</span>, ..., -<span class="hljs-number">1.8431</span>, -<span class="hljs-number">1.8431</span>, -<span class="hljs-number">1.8431</span>],
[ <span class="hljs-number">1.3081</span>, <span class="hljs-number">1.3081</span>, <span class="hljs-number">1.3081</span>, ..., -<span class="hljs-number">1.8256</span>, -<span class="hljs-number">1.8256</span>, -<span class="hljs-number">1.8256</span>],
...,
[-<span class="hljs-number">1.3179</span>, -<span class="hljs-number">1.3179</span>, -<span class="hljs-number">1.3179</span>, ..., -<span class="hljs-number">1.8606</span>, -<span class="hljs-number">1.8606</span>, -<span class="hljs-number">1.8606</span>],
[-<span class="hljs-number">1.3004</span>, -<span class="hljs-number">1.3004</span>, -<span class="hljs-number">1.3004</span>, ..., -<span class="hljs-number">1.8606</span>, -<span class="hljs-number">1.8431</span>, -<span class="hljs-number">1.8431</span>],
[-<span class="hljs-number">1.3004</span>, -<span class="hljs-number">1.3004</span>, -<span class="hljs-number">1.3004</span>, ..., -<span class="hljs-number">1.8606</span>, -<span class="hljs-number">1.8431</span>, -<span class="hljs-number">1.8431</span>]],
[[ <span class="hljs-number">1.4200</span>, <span class="hljs-number">1.4200</span>, <span class="hljs-number">1.4200</span>, ..., -<span class="hljs-number">1.6476</span>, -<span class="hljs-number">1.6476</span>, -<span class="hljs-number">1.6476</span>],
[ <span class="hljs-number">1.4200</span>, <span class="hljs-number">1.4200</span>, <span class="hljs-number">1.4200</span>, ..., -<span class="hljs-number">1.6476</span>, -<span class="hljs-number">1.6476</span>, -<span class="hljs-number">1.6476</span>],
[ <span class="hljs-number">1.4200</span>, <span class="hljs-number">1.4200</span>, <span class="hljs-number">1.4200</span>, ..., -<span class="hljs-number">1.6302</span>, -<span class="hljs-number">1.6302</span>, -<span class="hljs-number">1.6302</span>],
...,
[-<span class="hljs-number">1.0201</span>, -<span class="hljs-number">1.0201</span>, -<span class="hljs-number">1.0201</span>, ..., -<span class="hljs-number">1.5604</span>, -<span class="hljs-number">1.5604</span>, -<span class="hljs-number">1.5604</span>],
[-<span class="hljs-number">1.0027</span>, -<span class="hljs-number">1.0027</span>, -<span class="hljs-number">1.0027</span>, ..., -<span class="hljs-number">1.5604</span>, -<span class="hljs-number">1.5430</span>, -<span class="hljs-number">1.5430</span>],
[-<span class="hljs-number">1.0027</span>, -<span class="hljs-number">1.0027</span>, -<span class="hljs-number">1.0027</span>, ..., -<span class="hljs-number">1.5604</span>, -<span class="hljs-number">1.5430</span>, -<span class="hljs-number">1.5430</span>]]]),
<span class="hljs-string">&#x27;pixel_mask&#x27;</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, ..., <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, ..., <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, ..., <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
...,
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, ..., <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, ..., <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, ..., <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]),
<span class="hljs-string">&#x27;labels&#x27;</span>: {<span class="hljs-string">&#x27;size&#x27;</span>: tensor([<span class="hljs-number">800</span>, <span class="hljs-number">800</span>]), <span class="hljs-string">&#x27;image_id&#x27;</span>: tensor([<span class="hljs-number">756</span>]), <span class="hljs-string">&#x27;class_labels&#x27;</span>: tensor([<span class="hljs-number">4</span>]), <span class="hljs-string">&#x27;boxes&#x27;</span>: tensor([[<span class="hljs-number">0.7340</span>, <span class="hljs-number">0.6986</span>, <span class="hljs-number">0.3414</span>, <span class="hljs-number">0.5944</span>]]), <span class="hljs-string">&#x27;area&#x27;</span>: tensor([<span class="hljs-number">519544.4375</span>]), <span class="hljs-string">&#x27;iscrowd&#x27;</span>: tensor([<span class="hljs-number">0</span>]), <span class="hljs-string">&#x27;orig_size&#x27;</span>: tensor([<span class="hljs-number">480</span>, <span class="hljs-number">480</span>])}}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-lzm515">個々の画像を正常に拡張し、それらの注釈を準備しました。ただし、前処理はそうではありません。
まだ完成しています。最後のステップでは、画像をバッチ処理するためのカスタム <code>collat​​e_fn</code> を作成します。
画像 (現在は <code>pixel_values</code>) をバッチ内の最大の画像にパディングし、対応する <code>pixel_mask</code> を作成します
どのピクセルが実数 (1) で、どのピクセルがパディング (0) であるかを示します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">collate_fn</span>(<span class="hljs-params">batch</span>):
<span class="hljs-meta">... </span> pixel_values = [item[<span class="hljs-string">&quot;pixel_values&quot;</span>] <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> batch]
<span class="hljs-meta">... </span> encoding = image_processor.pad(pixel_values, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">... </span> labels = [item[<span class="hljs-string">&quot;labels&quot;</span>] <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> batch]
<span class="hljs-meta">... </span> batch = {}
<span class="hljs-meta">... </span> batch[<span class="hljs-string">&quot;pixel_values&quot;</span>] = encoding[<span class="hljs-string">&quot;pixel_values&quot;</span>]
<span class="hljs-meta">... </span> batch[<span class="hljs-string">&quot;pixel_mask&quot;</span>] = encoding[<span class="hljs-string">&quot;pixel_mask&quot;</span>]
<span class="hljs-meta">... </span> batch[<span class="hljs-string">&quot;labels&quot;</span>] = labels
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> batch<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="training-the-detr-model" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#training-the-detr-model"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Training the DETR model</span></h2> <p data-svelte-h="svelte-1144ckg">前のセクションで重労働のほとんどを完了したので、モデルをトレーニングする準備が整いました。
このデータセット内の画像は、サイズを変更した後でも依然として非常に大きいです。これは、このモデルを微調整すると、
少なくとも 1 つの GPU が必要です。</p> <p data-svelte-h="svelte-cqdmyi">トレーニングには次の手順が含まれます。</p> <ol data-svelte-h="svelte-1da4rlp"><li>前処理と同じチェックポイントを使用して、<a href="/docs/transformers/main/ja/model_doc/auto#transformers.AutoModelForObjectDetection">AutoModelForObjectDetection</a> でモデルを読み込みます。</li> <li><a href="/docs/transformers/main/ja/main_classes/trainer#transformers.TrainingArguments">TrainingArguments</a> でトレーニング ハイパーパラメータを定義します。</li> <li>トレーニング引数をモデル、データセット、画像プロセッサ、データ照合器とともに <a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer">Trainer</a> に渡します。</li> <li><a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer.train">train()</a> を呼び出してモデルを微調整します。</li></ol> <p data-svelte-h="svelte-1j2te6e">前処理に使用したのと同じチェックポイントからモデルをロードするときは、必ず<code>label2id</code>を渡してください。
および <code>id2label</code> マップは、以前にデータセットのメタデータから作成したものです。さらに、<code>ignore_mismatched_sizes=True</code>を指定して、既存の分類頭部を新しい分類頭部に置き換えます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForObjectDetection
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForObjectDetection.from_pretrained(
<span class="hljs-meta">... </span> checkpoint,
<span class="hljs-meta">... </span> id2label=id2label,
<span class="hljs-meta">... </span> label2id=label2id,
<span class="hljs-meta">... </span> ignore_mismatched_sizes=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f1ek5p"><a href="/docs/transformers/main/ja/main_classes/trainer#transformers.TrainingArguments">TrainingArguments</a> で、<code>output_dir</code> を使用してモデルの保存場所を指定し、必要に応じてハイパーパラメーターを構成します。
画像列が削除されるため、未使用の列を削除しないことが重要です。画像列がないと、
<code>pixel_values</code> を作成できません。このため、<code>remove_unused_columns</code><code>False</code>に設定します。
ハブにプッシュしてモデルを共有したい場合は、<code>push_to_hub</code><code>True</code> に設定します (Hugging にサインインする必要があります)
顔に向かってモデルをアップロードします)。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TrainingArguments
<span class="hljs-meta">&gt;&gt;&gt; </span>training_args = TrainingArguments(
<span class="hljs-meta">... </span> output_dir=<span class="hljs-string">&quot;detr-resnet-50_finetuned_cppe5&quot;</span>,
<span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">8</span>,
<span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">10</span>,
<span class="hljs-meta">... </span> fp16=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span> save_steps=<span class="hljs-number">200</span>,
<span class="hljs-meta">... </span> logging_steps=<span class="hljs-number">50</span>,
<span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">1e-5</span>,
<span class="hljs-meta">... </span> weight_decay=<span class="hljs-number">1e-4</span>,
<span class="hljs-meta">... </span> save_total_limit=<span class="hljs-number">2</span>,
<span class="hljs-meta">... </span> remove_unused_columns=<span class="hljs-literal">False</span>,
<span class="hljs-meta">... </span> push_to_hub=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-432r7p">最後に、すべてをまとめて、<a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer.train">train()</a> を呼び出します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer = Trainer(
<span class="hljs-meta">... </span> model=model,
<span class="hljs-meta">... </span> args=training_args,
<span class="hljs-meta">... </span> data_collator=collate_fn,
<span class="hljs-meta">... </span> train_dataset=cppe5[<span class="hljs-string">&quot;train&quot;</span>],
<span class="hljs-meta">... </span> tokenizer=image_processor,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ki34dy"><code>training_args</code><code>push_to_hub</code><code>True</code>に設定した場合、トレーニング チェックポイントは
ハグフェイスハブ。トレーニングが完了したら、<a href="/docs/transformers/main/ja/main_classes/trainer#transformers.Trainer.push_to_hub">push_to_hub()</a> メソッドを呼び出して、最終モデルもハブにプッシュします。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>trainer.push_to_hub()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="evaluate" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluate"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Evaluate</span></h2> <p data-svelte-h="svelte-1f6htse">物体検出モデルは通常、一連の <a href="https://cocodataset.org/#detection-eval">COCO スタイルの指標</a>を使用して評価されます。
既存のメトリクス実装のいずれかを使用できますが、ここでは<code>torchvision</code>のメトリクス実装を使用して最終的なメトリクスを評価します。
ハブにプッシュしたモデル。</p> <p data-svelte-h="svelte-1urhuq1"><code>torchvision</code>エバリュエーターを使用するには、グラウンド トゥルース COCO データセットを準備する必要があります。 COCO データセットを構築するための API
データを特定の形式で保存する必要があるため、最初に画像と注釈をディスクに保存する必要があります。と同じように
トレーニング用にデータを準備するとき、<code>cppe5[&quot;test&quot;]</code> からの注釈をフォーマットする必要があります。ただし、画像
そのままでいるべきです。</p> <p data-svelte-h="svelte-4fq9ze">評価ステップには少し作業が必要ですが、大きく 3 つのステップに分けることができます。
まず、<code>cppe5[&quot;test&quot;]</code> セットを準備します。注釈をフォーマットし、データをディスクに保存します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> json
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># format annotations the same as for training, no need for data augmentation</span>
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">val_formatted_anns</span>(<span class="hljs-params">image_id, objects</span>):
<span class="hljs-meta">... </span> annotations = []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">0</span>, <span class="hljs-built_in">len</span>(objects[<span class="hljs-string">&quot;id&quot;</span>])):
<span class="hljs-meta">... </span> new_ann = {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;id&quot;</span>: objects[<span class="hljs-string">&quot;id&quot;</span>][i],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;category_id&quot;</span>: objects[<span class="hljs-string">&quot;category&quot;</span>][i],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;iscrowd&quot;</span>: <span class="hljs-number">0</span>,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;image_id&quot;</span>: image_id,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;area&quot;</span>: objects[<span class="hljs-string">&quot;area&quot;</span>][i],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;bbox&quot;</span>: objects[<span class="hljs-string">&quot;bbox&quot;</span>][i],
<span class="hljs-meta">... </span> }
<span class="hljs-meta">... </span> annotations.append(new_ann)
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> annotations
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Save images and annotations into the files torchvision.datasets.CocoDetection expects</span>
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">save_cppe5_annotation_file_images</span>(<span class="hljs-params">cppe5</span>):
<span class="hljs-meta">... </span> output_json = {}
<span class="hljs-meta">... </span> path_output_cppe5 = <span class="hljs-string">f&quot;<span class="hljs-subst">{os.getcwd()}</span>/cppe5/&quot;</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> os.path.exists(path_output_cppe5):
<span class="hljs-meta">... </span> os.makedirs(path_output_cppe5)
<span class="hljs-meta">... </span> path_anno = os.path.join(path_output_cppe5, <span class="hljs-string">&quot;cppe5_ann.json&quot;</span>)
<span class="hljs-meta">... </span> categories_json = [{<span class="hljs-string">&quot;supercategory&quot;</span>: <span class="hljs-string">&quot;none&quot;</span>, <span class="hljs-string">&quot;id&quot;</span>: <span class="hljs-built_in">id</span>, <span class="hljs-string">&quot;name&quot;</span>: id2label[<span class="hljs-built_in">id</span>]} <span class="hljs-keyword">for</span> <span class="hljs-built_in">id</span> <span class="hljs-keyword">in</span> id2label]
<span class="hljs-meta">... </span> output_json[<span class="hljs-string">&quot;images&quot;</span>] = []
<span class="hljs-meta">... </span> output_json[<span class="hljs-string">&quot;annotations&quot;</span>] = []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> example <span class="hljs-keyword">in</span> cppe5:
<span class="hljs-meta">... </span> ann = val_formatted_anns(example[<span class="hljs-string">&quot;image_id&quot;</span>], example[<span class="hljs-string">&quot;objects&quot;</span>])
<span class="hljs-meta">... </span> output_json[<span class="hljs-string">&quot;images&quot;</span>].append(
<span class="hljs-meta">... </span> {
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;id&quot;</span>: example[<span class="hljs-string">&quot;image_id&quot;</span>],
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;width&quot;</span>: example[<span class="hljs-string">&quot;image&quot;</span>].width,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;height&quot;</span>: example[<span class="hljs-string">&quot;image&quot;</span>].height,
<span class="hljs-meta">... </span> <span class="hljs-string">&quot;file_name&quot;</span>: <span class="hljs-string">f&quot;<span class="hljs-subst">{example[<span class="hljs-string">&#x27;image_id&#x27;</span>]}</span>.png&quot;</span>,
<span class="hljs-meta">... </span> }
<span class="hljs-meta">... </span> )
<span class="hljs-meta">... </span> output_json[<span class="hljs-string">&quot;annotations&quot;</span>].extend(ann)
<span class="hljs-meta">... </span> output_json[<span class="hljs-string">&quot;categories&quot;</span>] = categories_json
<span class="hljs-meta">... </span> <span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(path_anno, <span class="hljs-string">&quot;w&quot;</span>) <span class="hljs-keyword">as</span> file:
<span class="hljs-meta">... </span> json.dump(output_json, file, ensure_ascii=<span class="hljs-literal">False</span>, indent=<span class="hljs-number">4</span>)
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> im, img_id <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(cppe5[<span class="hljs-string">&quot;image&quot;</span>], cppe5[<span class="hljs-string">&quot;image_id&quot;</span>]):
<span class="hljs-meta">... </span> path_img = os.path.join(path_output_cppe5, <span class="hljs-string">f&quot;<span class="hljs-subst">{img_id}</span>.png&quot;</span>)
<span class="hljs-meta">... </span> im.save(path_img)
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> path_output_cppe5, path_anno<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1pucslt">次に、<code>cocoevaluator</code>で利用できる<code>CocoDetection</code>クラスのインスタンスを用意します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torchvision
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">class</span> <span class="hljs-title class_">CocoDetection</span>(torchvision.datasets.CocoDetection):
<span class="hljs-meta">... </span> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__init__</span>(<span class="hljs-params">self, img_folder, image_processor, ann_file</span>):
<span class="hljs-meta">... </span> <span class="hljs-built_in">super</span>().__init__(img_folder, ann_file)
<span class="hljs-meta">... </span> self.image_processor = image_processor
<span class="hljs-meta">... </span> <span class="hljs-keyword">def</span> <span class="hljs-title function_">__getitem__</span>(<span class="hljs-params">self, idx</span>):
<span class="hljs-meta">... </span> <span class="hljs-comment"># read in PIL image and target in COCO format</span>
<span class="hljs-meta">... </span> img, target = <span class="hljs-built_in">super</span>(CocoDetection, self).__getitem__(idx)
<span class="hljs-meta">... </span> <span class="hljs-comment"># preprocess image and target: converting target to DETR format,</span>
<span class="hljs-meta">... </span> <span class="hljs-comment"># resizing + normalization of both image and target)</span>
<span class="hljs-meta">... </span> image_id = self.ids[idx]
<span class="hljs-meta">... </span> target = {<span class="hljs-string">&quot;image_id&quot;</span>: image_id, <span class="hljs-string">&quot;annotations&quot;</span>: target}
<span class="hljs-meta">... </span> encoding = self.image_processor(images=img, annotations=target, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">... </span> pixel_values = encoding[<span class="hljs-string">&quot;pixel_values&quot;</span>].squeeze() <span class="hljs-comment"># remove batch dimension</span>
<span class="hljs-meta">... </span> target = encoding[<span class="hljs-string">&quot;labels&quot;</span>][<span class="hljs-number">0</span>] <span class="hljs-comment"># remove batch dimension</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> {<span class="hljs-string">&quot;pixel_values&quot;</span>: pixel_values, <span class="hljs-string">&quot;labels&quot;</span>: target}
<span class="hljs-meta">&gt;&gt;&gt; </span>im_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;devonho/detr-resnet-50_finetuned_cppe5&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>path_output_cppe5, path_anno = save_cppe5_annotation_file_images(cppe5[<span class="hljs-string">&quot;test&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span>test_ds_coco_format = CocoDetection(path_output_cppe5, im_processor, path_anno)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-b0b8dt">最後に、メトリクスをロードして評価を実行します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> evaluate
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> tqdm <span class="hljs-keyword">import</span> tqdm
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForObjectDetection.from_pretrained(<span class="hljs-string">&quot;devonho/detr-resnet-50_finetuned_cppe5&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>module = evaluate.load(<span class="hljs-string">&quot;ybelkada/cocoevaluate&quot;</span>, coco=test_ds_coco_format.coco)
<span class="hljs-meta">&gt;&gt;&gt; </span>val_dataloader = torch.utils.data.DataLoader(
<span class="hljs-meta">... </span> test_ds_coco_format, batch_size=<span class="hljs-number">8</span>, shuffle=<span class="hljs-literal">False</span>, num_workers=<span class="hljs-number">4</span>, collate_fn=collate_fn
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> idx, batch <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(tqdm(val_dataloader)):
<span class="hljs-meta">... </span> pixel_values = batch[<span class="hljs-string">&quot;pixel_values&quot;</span>]
<span class="hljs-meta">... </span> pixel_mask = batch[<span class="hljs-string">&quot;pixel_mask&quot;</span>]
<span class="hljs-meta">... </span> labels = [
<span class="hljs-meta">... </span> {k: v <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> t.items()} <span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> batch[<span class="hljs-string">&quot;labels&quot;</span>]
<span class="hljs-meta">... </span> ] <span class="hljs-comment"># these are in DETR format, resized + normalized</span>
<span class="hljs-meta">... </span> <span class="hljs-comment"># forward pass</span>
<span class="hljs-meta">... </span> outputs = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
<span class="hljs-meta">... </span> orig_target_sizes = torch.stack([target[<span class="hljs-string">&quot;orig_size&quot;</span>] <span class="hljs-keyword">for</span> target <span class="hljs-keyword">in</span> labels], dim=<span class="hljs-number">0</span>)
<span class="hljs-meta">... </span> results = im_processor.post_process(outputs, orig_target_sizes) <span class="hljs-comment"># convert outputs of model to Pascal VOC format (xmin, ymin, xmax, ymax)</span>
<span class="hljs-meta">... </span> module.add(prediction=results, reference=labels)
<span class="hljs-meta">... </span> <span class="hljs-keyword">del</span> batch
<span class="hljs-meta">&gt;&gt;&gt; </span>results = module.compute()
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(results)
Accumulating evaluation results...
DONE (t=<span class="hljs-number">0.08</span>s).
IoU metric: bbox
Average Precision (AP) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= <span class="hljs-built_in">all</span> | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.352</span>
Average Precision (AP) @[ IoU=<span class="hljs-number">0.50</span> | area= <span class="hljs-built_in">all</span> | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.681</span>
Average Precision (AP) @[ IoU=<span class="hljs-number">0.75</span> | area= <span class="hljs-built_in">all</span> | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.292</span>
Average Precision (AP) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= small | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.168</span>
Average Precision (AP) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area=medium | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.208</span>
Average Precision (AP) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= large | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.429</span>
Average Recall (AR) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= <span class="hljs-built_in">all</span> | maxDets= <span class="hljs-number">1</span> ] = <span class="hljs-number">0.274</span>
Average Recall (AR) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= <span class="hljs-built_in">all</span> | maxDets= <span class="hljs-number">10</span> ] = <span class="hljs-number">0.484</span>
Average Recall (AR) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= <span class="hljs-built_in">all</span> | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.501</span>
Average Recall (AR) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= small | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.191</span>
Average Recall (AR) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area=medium | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.323</span>
Average Recall (AR) @[ IoU=<span class="hljs-number">0.50</span>:<span class="hljs-number">0.95</span> | area= large | maxDets=<span class="hljs-number">100</span> ] = <span class="hljs-number">0.590</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4n33ma">これらの結果は、<a href="/docs/transformers/main/ja/main_classes/trainer#transformers.TrainingArguments">TrainingArguments</a> のハイパーパラメータを調整することでさらに改善できます。試してごらん!</p> <h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Inference</span></h2> <p data-svelte-h="svelte-1l6wk9y">DETR モデルを微調整して評価し、Hugging Face Hub にアップロードしたので、それを推論に使用できます。
推論用に微調整されたモデルを試す最も簡単な方法は、それを <a href="/docs/transformers/main/ja/main_classes/pipelines#transformers.pipeline">pipeline()</a> で使用することです。パイプラインをインスタンス化する
モデルを使用してオブジェクトを検出し、それに画像を渡します。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> requests
<span class="hljs-meta">&gt;&gt;&gt; </span>url = <span class="hljs-string">&quot;https://i.imgur.com/2lnWoly.jpg&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw)
<span class="hljs-meta">&gt;&gt;&gt; </span>obj_detector = pipeline(<span class="hljs-string">&quot;object-detection&quot;</span>, model=<span class="hljs-string">&quot;devonho/detr-resnet-50_finetuned_cppe5&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>obj_detector(image)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19p6rx9">必要に応じて、パイプラインの結果を手動で複製することもできます。</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;devonho/detr-resnet-50_finetuned_cppe5&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForObjectDetection.from_pretrained(<span class="hljs-string">&quot;devonho/detr-resnet-50_finetuned_cppe5&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> inputs = image_processor(images=image, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">... </span> outputs = model(**inputs)
<span class="hljs-meta">... </span> target_sizes = torch.tensor([image.size[::-<span class="hljs-number">1</span>]])
<span class="hljs-meta">... </span> results = image_processor.post_process_object_detection(outputs, threshold=<span class="hljs-number">0.5</span>, target_sizes=target_sizes)[<span class="hljs-number">0</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">for</span> score, label, box <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(results[<span class="hljs-string">&quot;scores&quot;</span>], results[<span class="hljs-string">&quot;labels&quot;</span>], results[<span class="hljs-string">&quot;boxes&quot;</span>]):
<span class="hljs-meta">... </span> box = [<span class="hljs-built_in">round</span>(i, <span class="hljs-number">2</span>) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> box.tolist()]
<span class="hljs-meta">... </span> <span class="hljs-built_in">print</span>(
<span class="hljs-meta">... </span> <span class="hljs-string">f&quot;Detected <span class="hljs-subst">{model.config.id2label[label.item()]}</span> with confidence &quot;</span>
<span class="hljs-meta">... </span> <span class="hljs-string">f&quot;<span class="hljs-subst">{<span class="hljs-built_in">round</span>(score.item(), <span class="hljs-number">3</span>)}</span> at location <span class="hljs-subst">{box}</span>&quot;</span>
<span class="hljs-meta">... </span> )
Detected Coverall <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.566</span> at location [<span class="hljs-number">1215.32</span>, <span class="hljs-number">147.38</span>, <span class="hljs-number">4401.81</span>, <span class="hljs-number">3227.08</span>]
Detected Mask <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.584</span> at location [<span class="hljs-number">2449.06</span>, <span class="hljs-number">823.19</span>, <span class="hljs-number">3256.43</span>, <span class="hljs-number">1413.9</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12ncpbb">結果をプロットしてみましょう:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>draw = ImageDraw.Draw(image)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">for</span> score, label, box <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(results[<span class="hljs-string">&quot;scores&quot;</span>], results[<span class="hljs-string">&quot;labels&quot;</span>], results[<span class="hljs-string">&quot;boxes&quot;</span>]):
<span class="hljs-meta">... </span> box = [<span class="hljs-built_in">round</span>(i, <span class="hljs-number">2</span>) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> box.tolist()]
<span class="hljs-meta">... </span> x, y, x2, y2 = <span class="hljs-built_in">tuple</span>(box)
<span class="hljs-meta">... </span> draw.rectangle((x, y, x2, y2), outline=<span class="hljs-string">&quot;red&quot;</span>, width=<span class="hljs-number">1</span>)
<span class="hljs-meta">... </span> draw.text((x, y), model.config.id2label[label.item()], fill=<span class="hljs-string">&quot;white&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>image<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-16oi5q2"><img src="https://i.imgur.com/4QZnf9A.png" alt="Object detection result on a new image"></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ja/tasks/object_detection.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_jement = {
assets: "/docs/transformers/main/ja",
base: "/docs/transformers/main/ja",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/ja/_app/immutable/entry/start.1486e459.js"),
import("/docs/transformers/main/ja/_app/immutable/entry/app.d9ae818f.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 147],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
99.2 kB
·
Xet hash:
90ea41c1e0d40e22713e0396df4d28aefdd583c87306132251a0e0032f51a615

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.