Buckets:

rtrm's picture
download
raw
85.6 kB
import{s as Vt,f as Bt,o as Gt,n as Ie}from"../chunks/scheduler.9bc65507.js";import{S as Xt,i as Qt,g as c,s as a,r as f,A as Yt,h as l,f as o,c as r,j as J,u as g,x as h,k as $,y as d,a as i,v as u,d as _,t as b,w}from"../chunks/index.707bf1b6.js";import{T as Ht}from"../chunks/Tip.c2ecdbf4.js";import{D as H}from"../chunks/Docstring.570ddb64.js";import{C as xt}from"../chunks/CodeBlock.54a9f38d.js";import{E as Tt}from"../chunks/ExampleCodeBlock.bc2be6bc.js";import{H as _e,E as Kt}from"../chunks/EditOnGithub.922df6ba.js";function eo(z){let n,y="Examples:",m,p,v;return p=new xt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERldGFDb25maWclMkMlMjBEZXRhTW9kZWwlMEElMEElMjMlMjBJbml0aWFsaXppbmclMjBhJTIwREVUQSUyMFNlbnNlVGltZSUyRmRlZm9ybWFibGUtZGV0ciUyMHN0eWxlJTIwY29uZmlndXJhdGlvbiUwQWNvbmZpZ3VyYXRpb24lMjAlM0QlMjBEZXRhQ29uZmlnKCklMEElMEElMjMlMjBJbml0aWFsaXppbmclMjBhJTIwbW9kZWwlMjAod2l0aCUyMHJhbmRvbSUyMHdlaWdodHMpJTIwZnJvbSUyMHRoZSUyMFNlbnNlVGltZSUyRmRlZm9ybWFibGUtZGV0ciUyMHN0eWxlJTIwY29uZmlndXJhdGlvbiUwQW1vZGVsJTIwJTNEJTIwRGV0YU1vZGVsKGNvbmZpZ3VyYXRpb24pJTBBJTBBJTIzJTIwQWNjZXNzaW5nJTIwdGhlJTIwbW9kZWwlMjBjb25maWd1cmF0aW9uJTBBY29uZmlndXJhdGlvbiUyMCUzRCUyMG1vZGVsLmNvbmZpZw==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DetaConfig, DetaModel
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Initializing a DETA SenseTime/deformable-detr style configuration</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>configuration = DetaConfig()
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Initializing a model (with random weights) from the SenseTime/deformable-detr style configuration</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>model = DetaModel(configuration)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># Accessing the model configuration</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>configuration = model.config`,wrap:!1}}),{c(){n=c("p"),n.textContent=y,m=a(),f(p.$$.fragment)},l(s){n=l(s,"P",{"data-svelte-h":!0}),h(n)!=="svelte-kvfsh7"&&(n.textContent=y),m=r(s),g(p.$$.fragment,s)},m(s,T){i(s,n,T),i(s,m,T),u(p,s,T),v=!0},p:Ie,i(s){v||(_(p.$$.fragment,s),v=!0)},o(s){b(p.$$.fragment,s),v=!1},d(s){s&&(o(n),o(m)),w(p,s)}}}function to(z){let n,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){n=c("p"),n.innerHTML=y},l(m){n=l(m,"P",{"data-svelte-h":!0}),h(n)!=="svelte-fincs2"&&(n.innerHTML=y)},m(m,p){i(m,n,p)},p:Ie,d(m){m&&o(n)}}}function oo(z){let n,y="Examples:",m,p,v;return p=new xt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9JbWFnZVByb2Nlc3NvciUyQyUyMERldGFNb2RlbCUwQWZyb20lMjBQSUwlMjBpbXBvcnQlMjBJbWFnZSUwQWltcG9ydCUyMHJlcXVlc3RzJTBBJTBBdXJsJTIwJTNEJTIwJTIyaHR0cCUzQSUyRiUyRmltYWdlcy5jb2NvZGF0YXNldC5vcmclMkZ2YWwyMDE3JTJGMDAwMDAwMDM5NzY5LmpwZyUyMiUwQWltYWdlJTIwJTNEJTIwSW1hZ2Uub3BlbihyZXF1ZXN0cy5nZXQodXJsJTJDJTIwc3RyZWFtJTNEVHJ1ZSkucmF3KSUwQSUwQWltYWdlX3Byb2Nlc3NvciUyMCUzRCUyMEF1dG9JbWFnZVByb2Nlc3Nvci5mcm9tX3ByZXRyYWluZWQoJTIyam96aGFuZzk3JTJGZGV0YS1zd2luLWxhcmdlLW8zNjUlMjIpJTBBbW9kZWwlMjAlM0QlMjBEZXRhTW9kZWwuZnJvbV9wcmV0cmFpbmVkKCUyMmpvemhhbmc5NyUyRmRldGEtc3dpbi1sYXJnZS1vMzY1JTIyJTJDJTIwdHdvX3N0YWdlJTNERmFsc2UpJTBBJTBBaW5wdXRzJTIwJTNEJTIwaW1hZ2VfcHJvY2Vzc29yKGltYWdlcyUzRGltYWdlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEElMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpJTBBJTBBbGFzdF9oaWRkZW5fc3RhdGVzJTIwJTNEJTIwb3V0cHV0cy5sYXN0X2hpZGRlbl9zdGF0ZSUwQWxpc3QobGFzdF9oaWRkZW5fc3RhdGVzLnNoYXBlKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor, DetaModel
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> requests
<span class="hljs-meta">&gt;&gt;&gt; </span>url = <span class="hljs-string">&quot;http://images.cocodataset.org/val2017/000000039769.jpg&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw)
<span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;jozhang97/deta-swin-large-o365&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = DetaModel.from_pretrained(<span class="hljs-string">&quot;jozhang97/deta-swin-large-o365&quot;</span>, two_stage=<span class="hljs-literal">False</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>inputs = image_processor(images=image, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>outputs = model(**inputs)
<span class="hljs-meta">&gt;&gt;&gt; </span>last_hidden_states = outputs.last_hidden_state
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">list</span>(last_hidden_states.shape)
[<span class="hljs-number">1</span>, <span class="hljs-number">900</span>, <span class="hljs-number">256</span>]`,wrap:!1}}),{c(){n=c("p"),n.textContent=y,m=a(),f(p.$$.fragment)},l(s){n=l(s,"P",{"data-svelte-h":!0}),h(n)!=="svelte-kvfsh7"&&(n.textContent=y),m=r(s),g(p.$$.fragment,s)},m(s,T){i(s,n,T),i(s,m,T),u(p,s,T),v=!0},p:Ie,i(s){v||(_(p.$$.fragment,s),v=!0)},o(s){b(p.$$.fragment,s),v=!1},d(s){s&&(o(n),o(m)),w(p,s)}}}function no(z){let n,y=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
instance afterwards instead of this since the former takes care of running the pre and post processing steps while
the latter silently ignores them.`;return{c(){n=c("p"),n.innerHTML=y},l(m){n=l(m,"P",{"data-svelte-h":!0}),h(n)!=="svelte-fincs2"&&(n.innerHTML=y)},m(m,p){i(m,n,p)},p:Ie,d(m){m&&o(n)}}}function so(z){let n,y="Examples:",m,p,v;return p=new xt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9JbWFnZVByb2Nlc3NvciUyQyUyMERldGFGb3JPYmplY3REZXRlY3Rpb24lMEFmcm9tJTIwUElMJTIwaW1wb3J0JTIwSW1hZ2UlMEFpbXBvcnQlMjByZXF1ZXN0cyUwQSUwQXVybCUyMCUzRCUyMCUyMmh0dHAlM0ElMkYlMkZpbWFnZXMuY29jb2RhdGFzZXQub3JnJTJGdmFsMjAxNyUyRjAwMDAwMDAzOTc2OS5qcGclMjIlMEFpbWFnZSUyMCUzRCUyMEltYWdlLm9wZW4ocmVxdWVzdHMuZ2V0KHVybCUyQyUyMHN0cmVhbSUzRFRydWUpLnJhdyklMEElMEFpbWFnZV9wcm9jZXNzb3IlMjAlM0QlMjBBdXRvSW1hZ2VQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUyMmpvemhhbmc5NyUyRmRldGEtc3dpbi1sYXJnZSUyMiklMEFtb2RlbCUyMCUzRCUyMERldGFGb3JPYmplY3REZXRlY3Rpb24uZnJvbV9wcmV0cmFpbmVkKCUyMmpvemhhbmc5NyUyRmRldGEtc3dpbi1sYXJnZSUyMiklMEElMEFpbnB1dHMlMjAlM0QlMjBpbWFnZV9wcm9jZXNzb3IoaW1hZ2VzJTNEaW1hZ2UlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyklMEElMEElMjMlMjBjb252ZXJ0JTIwb3V0cHV0cyUyMChib3VuZGluZyUyMGJveGVzJTIwYW5kJTIwY2xhc3MlMjBsb2dpdHMpJTIwdG8lMjBQYXNjYWwlMjBWT0MlMjBmb3JtYXQlMjAoeG1pbiUyQyUyMHltaW4lMkMlMjB4bWF4JTJDJTIweW1heCklMEF0YXJnZXRfc2l6ZXMlMjAlM0QlMjB0b3JjaC50ZW5zb3IoJTVCaW1hZ2Uuc2l6ZSU1QiUzQSUzQS0xJTVEJTVEKSUwQXJlc3VsdHMlMjAlM0QlMjBpbWFnZV9wcm9jZXNzb3IucG9zdF9wcm9jZXNzX29iamVjdF9kZXRlY3Rpb24ob3V0cHV0cyUyQyUyMHRocmVzaG9sZCUzRDAuNSUyQyUyMHRhcmdldF9zaXplcyUzRHRhcmdldF9zaXplcyklNUIlMEElMjAlMjAlMjAlMjAwJTBBJTVEJTBBZm9yJTIwc2NvcmUlMkMlMjBsYWJlbCUyQyUyMGJveCUyMGluJTIwemlwKHJlc3VsdHMlNUIlMjJzY29yZXMlMjIlNUQlMkMlMjByZXN1bHRzJTVCJTIybGFiZWxzJTIyJTVEJTJDJTIwcmVzdWx0cyU1QiUyMmJveGVzJTIyJTVEKSUzQSUwQSUyMCUyMCUyMCUyMGJveCUyMCUzRCUyMCU1QnJvdW5kKGklMkMlMjAyKSUyMGZvciUyMGklMjBpbiUyMGJveC50b2xpc3QoKSU1RCUwQSUyMCUyMCUyMCUyMHByaW50KCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGYlMjJEZXRlY3RlZCUyMCU3Qm1vZGVsLmNvbmZpZy5pZDJsYWJlbCU1QmxhYmVsLml0ZW0oKSU1RCU3RCUyMHdpdGglMjBjb25maWRlbmNlJTIwJTIyJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZiUyMiU3QnJvdW5kKHNjb3JlLml0ZW0oKSUyQyUyMDMpJTdEJTIwYXQlMjBsb2NhdGlvbiUyMCU3QmJveCU3RCUyMiUwQSUyMCUyMCUyMCUyMCk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor, DetaForObjectDetection
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> requests
<span class="hljs-meta">&gt;&gt;&gt; </span>url = <span class="hljs-string">&quot;http://images.cocodataset.org/val2017/000000039769.jpg&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw)
<span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;jozhang97/deta-swin-large&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = DetaForObjectDetection.from_pretrained(<span class="hljs-string">&quot;jozhang97/deta-swin-large&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>inputs = image_processor(images=image, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>outputs = model(**inputs)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>target_sizes = torch.tensor([image.size[::-<span class="hljs-number">1</span>]])
<span class="hljs-meta">&gt;&gt;&gt; </span>results = image_processor.post_process_object_detection(outputs, threshold=<span class="hljs-number">0.5</span>, target_sizes=target_sizes)[
<span class="hljs-meta">... </span> <span class="hljs-number">0</span>
<span class="hljs-meta">... </span>]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">for</span> score, label, box <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(results[<span class="hljs-string">&quot;scores&quot;</span>], results[<span class="hljs-string">&quot;labels&quot;</span>], results[<span class="hljs-string">&quot;boxes&quot;</span>]):
<span class="hljs-meta">... </span> box = [<span class="hljs-built_in">round</span>(i, <span class="hljs-number">2</span>) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> box.tolist()]
<span class="hljs-meta">... </span> <span class="hljs-built_in">print</span>(
<span class="hljs-meta">... </span> <span class="hljs-string">f&quot;Detected <span class="hljs-subst">{model.config.id2label[label.item()]}</span> with confidence &quot;</span>
<span class="hljs-meta">... </span> <span class="hljs-string">f&quot;<span class="hljs-subst">{<span class="hljs-built_in">round</span>(score.item(), <span class="hljs-number">3</span>)}</span> at location <span class="hljs-subst">{box}</span>&quot;</span>
<span class="hljs-meta">... </span> )
Detected cat <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.802</span> at location [<span class="hljs-number">9.87</span>, <span class="hljs-number">54.36</span>, <span class="hljs-number">316.93</span>, <span class="hljs-number">473.44</span>]
Detected cat <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.795</span> at location [<span class="hljs-number">346.62</span>, <span class="hljs-number">24.35</span>, <span class="hljs-number">639.62</span>, <span class="hljs-number">373.2</span>]
Detected remote <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.725</span> at location [<span class="hljs-number">40.41</span>, <span class="hljs-number">73.36</span>, <span class="hljs-number">175.77</span>, <span class="hljs-number">117.29</span>]
Detected remote <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.638</span> at location [<span class="hljs-number">333.34</span>, <span class="hljs-number">76.81</span>, <span class="hljs-number">370.22</span>, <span class="hljs-number">187.94</span>]
Detected couch <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.584</span> at location [<span class="hljs-number">0.03</span>, <span class="hljs-number">0.99</span>, <span class="hljs-number">640.02</span>, <span class="hljs-number">474.93</span>]`,wrap:!1}}),{c(){n=c("p"),n.textContent=y,m=a(),f(p.$$.fragment)},l(s){n=l(s,"P",{"data-svelte-h":!0}),h(n)!=="svelte-kvfsh7"&&(n.textContent=y),m=r(s),g(p.$$.fragment,s)},m(s,T){i(s,n,T),i(s,m,T),u(p,s,T),v=!0},p:Ie,i(s){v||(_(p.$$.fragment,s),v=!0)},o(s){b(p.$$.fragment,s),v=!1},d(s){s&&(o(n),o(m)),w(p,s)}}}function ao(z){let n,y,m,p,v,s,T,qe,V,Mt=`DETA モデルは、<a href="https://arxiv.org/abs/2212.06137" rel="nofollow">NMS Strikes Back</a> で Jeffrey Ouyang-Zhang、Jang Hyun Cho、Xingyi Zhou、Philipp Krähenbühl によって提案されました。
DETA (Detection Transformers with Assignment の略) は、1 対 1 の 2 部ハンガリアン マッチング損失を置き換えることにより、<a href="deformable_detr">Deformable DETR</a> を改善します。
非最大抑制 (NMS) を備えた従来の検出器で使用される 1 対多のラベル割り当てを使用します。これにより、最大 2.5 mAP の大幅な増加が得られます。`,Fe,B,Dt="論文の要約は次のとおりです。",Je,G,jt="<em>Detection Transformer (DETR) は、トレーニング中に 1 対 1 の 2 部マッチングを使用してクエリを一意のオブジェクトに直接変換し、エンドツーエンドのオブジェクト検出を可能にします。最近、これらのモデルは、紛れもない優雅さで COCO の従来の検出器を上回りました。ただし、モデル アーキテクチャやトレーニング スケジュールなど、さまざまな設計において従来の検出器とは異なるため、1 対 1 マッチングの有効性は完全には理解されていません。この研究では、DETR での 1 対 1 のハンガリー語マッチングと、非最大監視 (NMS) を備えた従来の検出器での 1 対多のラベル割り当てとの間の厳密な比較を行います。驚くべきことに、NMS を使用した 1 対多の割り当ては、同じ設定の下で標準的な 1 対 1 のマッチングよりも一貫して優れており、最大 2.5 mAP という大幅な向上が見られます。従来の IoU ベースのラベル割り当てを使用して Deformable-DETR をトレーニングする当社の検出器は、ResNet50 バックボーンを使用して 12 エポック (1x スケジュール) 以内に 50.2 COCO mAP を達成し、この設定で既存のすべての従来の検出器またはトランスベースの検出器を上回りました。複数のデータセット、スケジュール、アーキテクチャに関して、私たちは一貫して、パフォーマンスの高い検出トランスフォーマーには二部マッチングが不要であることを示しています。さらに、検出トランスの成功は、表現力豊かなトランス アーキテクチャによるものであると考えています。</em>",Ne,N,Ct,Pe,X,$t='DETA の概要。 <a href="https://arxiv.org/abs/2212.06137">元の論文</a>から抜粋。',Ee,Q,zt=`このモデルは、<a href="https://huggingface.co/nielsr" rel="nofollow">nielsr</a> によって提供されました。
元のコードは <a href="https://github.com/jozhang97/DETA" rel="nofollow">ここ</a> にあります。`,Re,Y,Ze,K,kt="DETA の使用を開始するのに役立つ公式 Hugging Face およびコミュニティ (🌎 で示されている) リソースのリスト。",We,ee,Ut='<li>DETA のデモ ノートブックは <a href="https://github.com/NielsRogge/Transformers-Tutorials/tree/master/DETA" rel="nofollow">こちら</a> にあります。</li> <li>参照: <a href="../tasks/object_detection">オブジェクト検出タスク ガイド</a></li>',Oe,te,It="ここに含めるリソースの送信に興味がある場合は、お気軽にプル リクエストを開いてください。審査させていただきます。リソースは、既存のリソースを複製するのではなく、何か新しいものを示すことが理想的です。",Le,oe,Ae,D,ne,et,be,qt=`This is the configuration class to store the configuration of a <a href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaModel">DetaModel</a>. It is used to instantiate a DETA
model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
defaults will yield a similar configuration to that of the DETA
<a href="https://huggingface.co/SenseTime/deformable-detr" rel="nofollow">SenseTime/deformable-detr</a> architecture.`,tt,we,Ft=`Configuration objects inherit from <a href="/docs/transformers/pr_31976/ja/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> and can be used to control the model outputs. Read the
documentation from <a href="/docs/transformers/pr_31976/ja/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> for more information.`,ot,P,Se,se,He,j,ae,nt,ve,Jt="Constructs a Deformable DETR image processor.",st,E,re,at,ye,Nt="Preprocess an image or a batch of images so that it can be used by the model.",rt,R,ie,it,Te,Pt=`Converts the output of <a href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaForObjectDetection">DetaForObjectDetection</a> into final bounding boxes in (top_left_x, top_left_y,
bottom_right_x, bottom_right_y) format. Only supports PyTorch.`,Ve,de,Be,x,ce,dt,xe,Et=`The bare DETA Model (consisting of a backbone and encoder-decoder Transformer) outputting raw hidden-states without
any specific head on top.`,ct,Me,Rt=`This model inherits from <a href="/docs/transformers/pr_31976/ja/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)`,lt,De,Zt=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.`,mt,k,le,pt,je,Wt='The <a href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaModel">DetaModel</a> forward method, overrides the <code>__call__</code> special method.',ht,Z,ft,W,Ge,me,Xe,M,pe,gt,Ce,Ot=`DETA Model (consisting of a backbone and encoder-decoder Transformer) with object detection heads on top, for tasks
such as COCO detection.`,ut,$e,Lt=`This model inherits from <a href="/docs/transformers/pr_31976/ja/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
etc.)`,_t,ze,At=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
and behavior.`,bt,U,he,wt,ke,St='The <a href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaForObjectDetection">DetaForObjectDetection</a> forward method, overrides the <code>__call__</code> special method.',vt,O,yt,L,Qe,fe,Ye,Ue,Ke;return v=new _e({props:{title:"DETA",local:"deta",headingTag:"h1"}}),T=new _e({props:{title:"Overview",local:"overview",headingTag:"h2"}}),Y=new _e({props:{title:"Resources",local:"resources",headingTag:"h2"}}),oe=new _e({props:{title:"DetaConfig",local:"transformers.DetaConfig",headingTag:"h2"}}),ne=new H({props:{name:"class transformers.DetaConfig",anchor:"transformers.DetaConfig",parameters:[{name:"backbone_config",val:" = None"},{name:"backbone",val:" = None"},{name:"use_pretrained_backbone",val:" = False"},{name:"use_timm_backbone",val:" = False"},{name:"backbone_kwargs",val:" = None"},{name:"num_queries",val:" = 900"},{name:"max_position_embeddings",val:" = 2048"},{name:"encoder_layers",val:" = 6"},{name:"encoder_ffn_dim",val:" = 2048"},{name:"encoder_attention_heads",val:" = 8"},{name:"decoder_layers",val:" = 6"},{name:"decoder_ffn_dim",val:" = 1024"},{name:"decoder_attention_heads",val:" = 8"},{name:"encoder_layerdrop",val:" = 0.0"},{name:"is_encoder_decoder",val:" = True"},{name:"activation_function",val:" = 'relu'"},{name:"d_model",val:" = 256"},{name:"dropout",val:" = 0.1"},{name:"attention_dropout",val:" = 0.0"},{name:"activation_dropout",val:" = 0.0"},{name:"init_std",val:" = 0.02"},{name:"init_xavier_std",val:" = 1.0"},{name:"return_intermediate",val:" = True"},{name:"auxiliary_loss",val:" = False"},{name:"position_embedding_type",val:" = 'sine'"},{name:"num_feature_levels",val:" = 5"},{name:"encoder_n_points",val:" = 4"},{name:"decoder_n_points",val:" = 4"},{name:"two_stage",val:" = True"},{name:"two_stage_num_proposals",val:" = 300"},{name:"with_box_refine",val:" = True"},{name:"assign_first_stage",val:" = True"},{name:"assign_second_stage",val:" = True"},{name:"class_cost",val:" = 1"},{name:"bbox_cost",val:" = 5"},{name:"giou_cost",val:" = 2"},{name:"mask_loss_coefficient",val:" = 1"},{name:"dice_loss_coefficient",val:" = 1"},{name:"bbox_loss_coefficient",val:" = 5"},{name:"giou_loss_coefficient",val:" = 2"},{name:"eos_coefficient",val:" = 0.1"},{name:"focal_alpha",val:" = 0.25"},{name:"disable_custom_kernels",val:" = True"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DetaConfig.backbone_config",description:`<strong>backbone_config</strong> (<code>PretrainedConfig</code> or <code>dict</code>, <em>optional</em>, defaults to <code>ResNetConfig()</code>) &#x2014;
The configuration of the backbone model.`,name:"backbone_config"},{anchor:"transformers.DetaConfig.backbone",description:`<strong>backbone</strong> (<code>str</code>, <em>optional</em>) &#x2014;
Name of backbone to use when <code>backbone_config</code> is <code>None</code>. If <code>use_pretrained_backbone</code> is <code>True</code>, this
will load the corresponding pretrained weights from the timm or transformers library. If <code>use_pretrained_backbone</code>
is <code>False</code>, this loads the backbone&#x2019;s config and uses that to initialize the backbone with random weights.`,name:"backbone"},{anchor:"transformers.DetaConfig.use_pretrained_backbone",description:`<strong>use_pretrained_backbone</strong> (<code>bool</code>, <em>optional</em>, <code>False</code>) &#x2014;
Whether to use pretrained weights for the backbone.`,name:"use_pretrained_backbone"},{anchor:"transformers.DetaConfig.use_timm_backbone",description:`<strong>use_timm_backbone</strong> (<code>bool</code>, <em>optional</em>, <code>False</code>) &#x2014;
Whether to load <code>backbone</code> from the timm library. If <code>False</code>, the backbone is loaded from the transformers
library.`,name:"use_timm_backbone"},{anchor:"transformers.DetaConfig.backbone_kwargs",description:`<strong>backbone_kwargs</strong> (<code>dict</code>, <em>optional</em>) &#x2014;
Keyword arguments to be passed to AutoBackbone when loading from a checkpoint
e.g. <code>{&apos;out_indices&apos;: (0, 1, 2, 3)}</code>. Cannot be specified if <code>backbone_config</code> is set.`,name:"backbone_kwargs"},{anchor:"transformers.DetaConfig.num_queries",description:`<strong>num_queries</strong> (<code>int</code>, <em>optional</em>, defaults to 900) &#x2014;
Number of object queries, i.e. detection slots. This is the maximal number of objects <a href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaModel">DetaModel</a> can
detect in a single image. In case <code>two_stage</code> is set to <code>True</code>, we use <code>two_stage_num_proposals</code> instead.`,name:"num_queries"},{anchor:"transformers.DetaConfig.d_model",description:`<strong>d_model</strong> (<code>int</code>, <em>optional</em>, defaults to 256) &#x2014;
Dimension of the layers.`,name:"d_model"},{anchor:"transformers.DetaConfig.encoder_layers",description:`<strong>encoder_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 6) &#x2014;
Number of encoder layers.`,name:"encoder_layers"},{anchor:"transformers.DetaConfig.decoder_layers",description:`<strong>decoder_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 6) &#x2014;
Number of decoder layers.`,name:"decoder_layers"},{anchor:"transformers.DetaConfig.encoder_attention_heads",description:`<strong>encoder_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 8) &#x2014;
Number of attention heads for each attention layer in the Transformer encoder.`,name:"encoder_attention_heads"},{anchor:"transformers.DetaConfig.decoder_attention_heads",description:`<strong>decoder_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 8) &#x2014;
Number of attention heads for each attention layer in the Transformer decoder.`,name:"decoder_attention_heads"},{anchor:"transformers.DetaConfig.decoder_ffn_dim",description:`<strong>decoder_ffn_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 2048) &#x2014;
Dimension of the &#x201C;intermediate&#x201D; (often named feed-forward) layer in decoder.`,name:"decoder_ffn_dim"},{anchor:"transformers.DetaConfig.encoder_ffn_dim",description:`<strong>encoder_ffn_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 2048) &#x2014;
Dimension of the &#x201C;intermediate&#x201D; (often named feed-forward) layer in decoder.`,name:"encoder_ffn_dim"},{anchor:"transformers.DetaConfig.activation_function",description:`<strong>activation_function</strong> (<code>str</code> or <code>function</code>, <em>optional</em>, defaults to <code>&quot;relu&quot;</code>) &#x2014;
The non-linear activation function (function or string) in the encoder and pooler. If string, <code>&quot;gelu&quot;</code>,
<code>&quot;relu&quot;</code>, <code>&quot;silu&quot;</code> and <code>&quot;gelu_new&quot;</code> are supported.`,name:"activation_function"},{anchor:"transformers.DetaConfig.dropout",description:`<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) &#x2014;
The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.`,name:"dropout"},{anchor:"transformers.DetaConfig.attention_dropout",description:`<strong>attention_dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) &#x2014;
The dropout ratio for the attention probabilities.`,name:"attention_dropout"},{anchor:"transformers.DetaConfig.activation_dropout",description:`<strong>activation_dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) &#x2014;
The dropout ratio for activations inside the fully connected layer.`,name:"activation_dropout"},{anchor:"transformers.DetaConfig.init_std",description:`<strong>init_std</strong> (<code>float</code>, <em>optional</em>, defaults to 0.02) &#x2014;
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.`,name:"init_std"},{anchor:"transformers.DetaConfig.init_xavier_std",description:`<strong>init_xavier_std</strong> (<code>float</code>, <em>optional</em>, defaults to 1) &#x2014;
The scaling factor used for the Xavier initialization gain in the HM Attention map module.`,name:"init_xavier_std"},{anchor:"transformers.DetaConfig.encoder_layerdrop",description:`<strong>encoder_layerdrop</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) &#x2014;
The LayerDrop probability for the encoder. See the [LayerDrop paper](see <a href="https://arxiv.org/abs/1909.11556" rel="nofollow">https://arxiv.org/abs/1909.11556</a>)
for more details.`,name:"encoder_layerdrop"},{anchor:"transformers.DetaConfig.auxiliary_loss",description:`<strong>auxiliary_loss</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) &#x2014;
Whether auxiliary decoding losses (loss at each decoder layer) are to be used.`,name:"auxiliary_loss"},{anchor:"transformers.DetaConfig.position_embedding_type",description:`<strong>position_embedding_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;sine&quot;</code>) &#x2014;
Type of position embeddings to be used on top of the image features. One of <code>&quot;sine&quot;</code> or <code>&quot;learned&quot;</code>.`,name:"position_embedding_type"},{anchor:"transformers.DetaConfig.class_cost",description:`<strong>class_cost</strong> (<code>float</code>, <em>optional</em>, defaults to 1) &#x2014;
Relative weight of the classification error in the Hungarian matching cost.`,name:"class_cost"},{anchor:"transformers.DetaConfig.bbox_cost",description:`<strong>bbox_cost</strong> (<code>float</code>, <em>optional</em>, defaults to 5) &#x2014;
Relative weight of the L1 error of the bounding box coordinates in the Hungarian matching cost.`,name:"bbox_cost"},{anchor:"transformers.DetaConfig.giou_cost",description:`<strong>giou_cost</strong> (<code>float</code>, <em>optional</em>, defaults to 2) &#x2014;
Relative weight of the generalized IoU loss of the bounding box in the Hungarian matching cost.`,name:"giou_cost"},{anchor:"transformers.DetaConfig.mask_loss_coefficient",description:`<strong>mask_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 1) &#x2014;
Relative weight of the Focal loss in the panoptic segmentation loss.`,name:"mask_loss_coefficient"},{anchor:"transformers.DetaConfig.dice_loss_coefficient",description:`<strong>dice_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 1) &#x2014;
Relative weight of the DICE/F-1 loss in the panoptic segmentation loss.`,name:"dice_loss_coefficient"},{anchor:"transformers.DetaConfig.bbox_loss_coefficient",description:`<strong>bbox_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 5) &#x2014;
Relative weight of the L1 bounding box loss in the object detection loss.`,name:"bbox_loss_coefficient"},{anchor:"transformers.DetaConfig.giou_loss_coefficient",description:`<strong>giou_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 2) &#x2014;
Relative weight of the generalized IoU loss in the object detection loss.`,name:"giou_loss_coefficient"},{anchor:"transformers.DetaConfig.eos_coefficient",description:`<strong>eos_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) &#x2014;
Relative classification weight of the &#x2018;no-object&#x2019; class in the object detection loss.`,name:"eos_coefficient"},{anchor:"transformers.DetaConfig.num_feature_levels",description:`<strong>num_feature_levels</strong> (<code>int</code>, <em>optional</em>, defaults to 5) &#x2014;
The number of input feature levels.`,name:"num_feature_levels"},{anchor:"transformers.DetaConfig.encoder_n_points",description:`<strong>encoder_n_points</strong> (<code>int</code>, <em>optional</em>, defaults to 4) &#x2014;
The number of sampled keys in each feature level for each attention head in the encoder.`,name:"encoder_n_points"},{anchor:"transformers.DetaConfig.decoder_n_points",description:`<strong>decoder_n_points</strong> (<code>int</code>, <em>optional</em>, defaults to 4) &#x2014;
The number of sampled keys in each feature level for each attention head in the decoder.`,name:"decoder_n_points"},{anchor:"transformers.DetaConfig.two_stage",description:`<strong>two_stage</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to apply a two-stage deformable DETR, where the region proposals are also generated by a variant of
DETA, which are further fed into the decoder for iterative bounding box refinement.`,name:"two_stage"},{anchor:"transformers.DetaConfig.two_stage_num_proposals",description:`<strong>two_stage_num_proposals</strong> (<code>int</code>, <em>optional</em>, defaults to 300) &#x2014;
The number of region proposals to be generated, in case <code>two_stage</code> is set to <code>True</code>.`,name:"two_stage_num_proposals"},{anchor:"transformers.DetaConfig.with_box_refine",description:`<strong>with_box_refine</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to apply iterative bounding box refinement, where each decoder layer refines the bounding boxes
based on the predictions from the previous layer.`,name:"with_box_refine"},{anchor:"transformers.DetaConfig.focal_alpha",description:`<strong>focal_alpha</strong> (<code>float</code>, <em>optional</em>, defaults to 0.25) &#x2014;
Alpha parameter in the focal loss.`,name:"focal_alpha"},{anchor:"transformers.DetaConfig.assign_first_stage",description:`<strong>assign_first_stage</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to assign each prediction i to the highest overlapping ground truth object if the overlap is larger than a threshold 0.7.`,name:"assign_first_stage"},{anchor:"transformers.DetaConfig.assign_second_stage",description:`<strong>assign_second_stage</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Whether to assign second assignment procedure in the second stage closely follows the first stage assignment procedure.`,name:"assign_second_stage"},{anchor:"transformers.DetaConfig.disable_custom_kernels",description:`<strong>disable_custom_kernels</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Disable the use of custom CUDA and CPU kernels. This option is necessary for the ONNX export, as custom
kernels are not supported by PyTorch ONNX export.`,name:"disable_custom_kernels"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/configuration_deta.py#L25"}}),P=new Tt({props:{anchor:"transformers.DetaConfig.example",$$slots:{default:[eo]},$$scope:{ctx:z}}}),se=new _e({props:{title:"DetaImageProcessor",local:"transformers.DetaImageProcessor",headingTag:"h2"}}),ae=new H({props:{name:"class transformers.DetaImageProcessor",anchor:"transformers.DetaImageProcessor",parameters:[{name:"format",val:": Union = <AnnotationFormat.COCO_DETECTION: 'coco_detection'>"},{name:"do_resize",val:": bool = True"},{name:"size",val:": Dict = None"},{name:"resample",val:": Resampling = <Resampling.BILINEAR: 2>"},{name:"do_rescale",val:": bool = True"},{name:"rescale_factor",val:": Union = 0.00392156862745098"},{name:"do_normalize",val:": bool = True"},{name:"image_mean",val:": Union = None"},{name:"image_std",val:": Union = None"},{name:"do_convert_annotations",val:": bool = True"},{name:"do_pad",val:": bool = True"},{name:"pad_size",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DetaImageProcessor.format",description:`<strong>format</strong> (<code>str</code>, <em>optional</em>, defaults to <code>&quot;coco_detection&quot;</code>) &#x2014;
Data format of the annotations. One of &#x201C;coco_detection&#x201D; or &#x201C;coco_panoptic&#x201D;.`,name:"format"},{anchor:"transformers.DetaImageProcessor.do_resize",description:`<strong>do_resize</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Controls whether to resize the image&#x2019;s (height, width) dimensions to the specified <code>size</code>. Can be
overridden by the <code>do_resize</code> parameter in the <code>preprocess</code> method.`,name:"do_resize"},{anchor:"transformers.DetaImageProcessor.size",description:`<strong>size</strong> (<code>Dict[str, int]</code> <em>optional</em>, defaults to <code>{&quot;shortest_edge&quot; -- 800, &quot;longest_edge&quot;: 1333}</code>):
Size of the image&#x2019;s <code>(height, width)</code> dimensions after resizing. Can be overridden by the <code>size</code> parameter
in the <code>preprocess</code> method. Available options are:<ul>
<li><code>{&quot;height&quot;: int, &quot;width&quot;: int}</code>: The image will be resized to the exact size <code>(height, width)</code>.
Do NOT keep the aspect ratio.</li>
<li><code>{&quot;shortest_edge&quot;: int, &quot;longest_edge&quot;: int}</code>: The image will be resized to a maximum size respecting
the aspect ratio and keeping the shortest edge less or equal to <code>shortest_edge</code> and the longest edge
less or equal to <code>longest_edge</code>.</li>
<li><code>{&quot;max_height&quot;: int, &quot;max_width&quot;: int}</code>: The image will be resized to the maximum size respecting the
aspect ratio and keeping the height less or equal to <code>max_height</code> and the width less or equal to
<code>max_width</code>.</li>
</ul>`,name:"size"},{anchor:"transformers.DetaImageProcessor.resample",description:`<strong>resample</strong> (<code>PILImageResampling</code>, <em>optional</em>, defaults to <code>PILImageResampling.BILINEAR</code>) &#x2014;
Resampling filter to use if resizing the image.`,name:"resample"},{anchor:"transformers.DetaImageProcessor.do_rescale",description:`<strong>do_rescale</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Controls whether to rescale the image by the specified scale <code>rescale_factor</code>. Can be overridden by the
<code>do_rescale</code> parameter in the <code>preprocess</code> method.`,name:"do_rescale"},{anchor:"transformers.DetaImageProcessor.rescale_factor",description:`<strong>rescale_factor</strong> (<code>int</code> or <code>float</code>, <em>optional</em>, defaults to <code>1/255</code>) &#x2014;
Scale factor to use if rescaling the image. Can be overridden by the <code>rescale_factor</code> parameter in the
<code>preprocess</code> method.
do_normalize &#x2014;
Controls whether to normalize the image. Can be overridden by the <code>do_normalize</code> parameter in the
<code>preprocess</code> method.`,name:"rescale_factor"},{anchor:"transformers.DetaImageProcessor.image_mean",description:`<strong>image_mean</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to <code>IMAGENET_DEFAULT_MEAN</code>) &#x2014;
Mean values to use when normalizing the image. Can be a single value or a list of values, one for each
channel. Can be overridden by the <code>image_mean</code> parameter in the <code>preprocess</code> method.`,name:"image_mean"},{anchor:"transformers.DetaImageProcessor.image_std",description:`<strong>image_std</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to <code>IMAGENET_DEFAULT_STD</code>) &#x2014;
Standard deviation values to use when normalizing the image. Can be a single value or a list of values, one
for each channel. Can be overridden by the <code>image_std</code> parameter in the <code>preprocess</code> method.`,name:"image_std"},{anchor:"transformers.DetaImageProcessor.do_convert_annotations",description:`<strong>do_convert_annotations</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Controls whether to convert the annotations to the format expected by the DETR model. Converts the
bounding boxes to the format <code>(center_x, center_y, width, height)</code> and in the range <code>[0, 1]</code>.
Can be overridden by the <code>do_convert_annotations</code> parameter in the <code>preprocess</code> method.`,name:"do_convert_annotations"},{anchor:"transformers.DetaImageProcessor.do_pad",description:`<strong>do_pad</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) &#x2014;
Controls whether to pad the image. Can be overridden by the <code>do_pad</code> parameter in the <code>preprocess</code>
method. If <code>True</code>, padding will be applied to the bottom and right of the image with zeros.
If <code>pad_size</code> is provided, the image will be padded to the specified dimensions.
Otherwise, the image will be padded to the maximum height and width of the batch.`,name:"do_pad"},{anchor:"transformers.DetaImageProcessor.pad_size",description:`<strong>pad_size</strong> (<code>Dict[str, int]</code>, <em>optional</em>) &#x2014;
The size <code>{&quot;height&quot;: int, &quot;width&quot; int}</code> to pad the images to. Must be larger than any image size
provided for preprocessing. If <code>pad_size</code> is not provided, images will be padded to the largest
height and width in the batch.`,name:"pad_size"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/image_processing_deta.py#L497"}}),re=new H({props:{name:"preprocess",anchor:"transformers.DetaImageProcessor.preprocess",parameters:[{name:"images",val:": Union"},{name:"annotations",val:": Union = None"},{name:"return_segmentation_masks",val:": bool = None"},{name:"masks_path",val:": Union = None"},{name:"do_resize",val:": Optional = None"},{name:"size",val:": Optional = None"},{name:"resample",val:" = None"},{name:"do_rescale",val:": Optional = None"},{name:"rescale_factor",val:": Union = None"},{name:"do_normalize",val:": Optional = None"},{name:"image_mean",val:": Union = None"},{name:"image_std",val:": Union = None"},{name:"do_convert_annotations",val:": Optional = None"},{name:"do_pad",val:": Optional = None"},{name:"format",val:": Union = None"},{name:"return_tensors",val:": Union = None"},{name:"data_format",val:": Union = <ChannelDimension.FIRST: 'channels_first'>"},{name:"input_data_format",val:": Union = None"},{name:"pad_size",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DetaImageProcessor.preprocess.images",description:`<strong>images</strong> (<code>ImageInput</code>) &#x2014;
Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging
from 0 to 255. If passing in images with pixel values between 0 and 1, set <code>do_rescale=False</code>.`,name:"images"},{anchor:"transformers.DetaImageProcessor.preprocess.annotations",description:`<strong>annotations</strong> (<code>List[Dict]</code> or <code>List[List[Dict]]</code>, <em>optional</em>) &#x2014;
List of annotations associated with the image or batch of images. If annotation is for object
detection, the annotations should be a dictionary with the following keys:<ul>
<li>&#x201C;image_id&#x201D; (<code>int</code>): The image id.</li>
<li>&#x201C;annotations&#x201D; (<code>List[Dict]</code>): List of annotations for an image. Each annotation should be a
dictionary. An image can have no annotations, in which case the list should be empty.
If annotation is for segmentation, the annotations should be a dictionary with the following keys:</li>
<li>&#x201C;image_id&#x201D; (<code>int</code>): The image id.</li>
<li>&#x201C;segments_info&#x201D; (<code>List[Dict]</code>): List of segments for an image. Each segment should be a dictionary.
An image can have no segments, in which case the list should be empty.</li>
<li>&#x201C;file_name&#x201D; (<code>str</code>): The file name of the image.</li>
</ul>`,name:"annotations"},{anchor:"transformers.DetaImageProcessor.preprocess.return_segmentation_masks",description:`<strong>return_segmentation_masks</strong> (<code>bool</code>, <em>optional</em>, defaults to self.return_segmentation_masks) &#x2014;
Whether to return segmentation masks.`,name:"return_segmentation_masks"},{anchor:"transformers.DetaImageProcessor.preprocess.masks_path",description:`<strong>masks_path</strong> (<code>str</code> or <code>pathlib.Path</code>, <em>optional</em>) &#x2014;
Path to the directory containing the segmentation masks.`,name:"masks_path"},{anchor:"transformers.DetaImageProcessor.preprocess.do_resize",description:`<strong>do_resize</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_resize) &#x2014;
Whether to resize the image.`,name:"do_resize"},{anchor:"transformers.DetaImageProcessor.preprocess.size",description:`<strong>size</strong> (<code>Dict[str, int]</code>, <em>optional</em>, defaults to self.size) &#x2014;
Size of the image&#x2019;s <code>(height, width)</code> dimensions after resizing. Available options are:<ul>
<li><code>{&quot;height&quot;: int, &quot;width&quot;: int}</code>: The image will be resized to the exact size <code>(height, width)</code>.
Do NOT keep the aspect ratio.</li>
<li><code>{&quot;shortest_edge&quot;: int, &quot;longest_edge&quot;: int}</code>: The image will be resized to a maximum size respecting
the aspect ratio and keeping the shortest edge less or equal to <code>shortest_edge</code> and the longest edge
less or equal to <code>longest_edge</code>.</li>
<li><code>{&quot;max_height&quot;: int, &quot;max_width&quot;: int}</code>: The image will be resized to the maximum size respecting the
aspect ratio and keeping the height less or equal to <code>max_height</code> and the width less or equal to
<code>max_width</code>.</li>
</ul>`,name:"size"},{anchor:"transformers.DetaImageProcessor.preprocess.resample",description:`<strong>resample</strong> (<code>PILImageResampling</code>, <em>optional</em>, defaults to self.resample) &#x2014;
Resampling filter to use when resizing the image.`,name:"resample"},{anchor:"transformers.DetaImageProcessor.preprocess.do_rescale",description:`<strong>do_rescale</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_rescale) &#x2014;
Whether to rescale the image.`,name:"do_rescale"},{anchor:"transformers.DetaImageProcessor.preprocess.rescale_factor",description:`<strong>rescale_factor</strong> (<code>float</code>, <em>optional</em>, defaults to self.rescale_factor) &#x2014;
Rescale factor to use when rescaling the image.`,name:"rescale_factor"},{anchor:"transformers.DetaImageProcessor.preprocess.do_normalize",description:`<strong>do_normalize</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_normalize) &#x2014;
Whether to normalize the image.`,name:"do_normalize"},{anchor:"transformers.DetaImageProcessor.preprocess.image_mean",description:`<strong>image_mean</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to self.image_mean) &#x2014;
Mean to use when normalizing the image.`,name:"image_mean"},{anchor:"transformers.DetaImageProcessor.preprocess.image_std",description:`<strong>image_std</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to self.image_std) &#x2014;
Standard deviation to use when normalizing the image.`,name:"image_std"},{anchor:"transformers.DetaImageProcessor.preprocess.do_convert_annotations",description:`<strong>do_convert_annotations</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_convert_annotations) &#x2014;
Whether to convert the annotations to the format expected by the model. Converts the bounding
boxes from the format <code>(top_left_x, top_left_y, width, height)</code> to <code>(center_x, center_y, width, height)</code>
and in relative coordinates.`,name:"do_convert_annotations"},{anchor:"transformers.DetaImageProcessor.preprocess.do_pad",description:`<strong>do_pad</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_pad) &#x2014;
Whether to pad the image. If <code>True</code>, padding will be applied to the bottom and right of
the image with zeros. If <code>pad_size</code> is provided, the image will be padded to the specified
dimensions. Otherwise, the image will be padded to the maximum height and width of the batch.`,name:"do_pad"},{anchor:"transformers.DetaImageProcessor.preprocess.format",description:`<strong>format</strong> (<code>str</code> or <code>AnnotationFormat</code>, <em>optional</em>, defaults to self.format) &#x2014;
Format of the annotations.`,name:"format"},{anchor:"transformers.DetaImageProcessor.preprocess.return_tensors",description:`<strong>return_tensors</strong> (<code>str</code> or <code>TensorType</code>, <em>optional</em>, defaults to self.return_tensors) &#x2014;
Type of tensors to return. If <code>None</code>, will return the list of images.`,name:"return_tensors"},{anchor:"transformers.DetaImageProcessor.preprocess.data_format",description:`<strong>data_format</strong> (<code>ChannelDimension</code> or <code>str</code>, <em>optional</em>, defaults to <code>ChannelDimension.FIRST</code>) &#x2014;
The channel dimension format for the output image. Can be one of:<ul>
<li><code>&quot;channels_first&quot;</code> or <code>ChannelDimension.FIRST</code>: image in (num_channels, height, width) format.</li>
<li><code>&quot;channels_last&quot;</code> or <code>ChannelDimension.LAST</code>: image in (height, width, num_channels) format.</li>
<li>Unset: Use the channel dimension format of the input image.</li>
</ul>`,name:"data_format"},{anchor:"transformers.DetaImageProcessor.preprocess.input_data_format",description:`<strong>input_data_format</strong> (<code>ChannelDimension</code> or <code>str</code>, <em>optional</em>) &#x2014;
The channel dimension format for the input image. If unset, the channel dimension format is inferred
from the input image. Can be one of:<ul>
<li><code>&quot;channels_first&quot;</code> or <code>ChannelDimension.FIRST</code>: image in (num_channels, height, width) format.</li>
<li><code>&quot;channels_last&quot;</code> or <code>ChannelDimension.LAST</code>: image in (height, width, num_channels) format.</li>
<li><code>&quot;none&quot;</code> or <code>ChannelDimension.NONE</code>: image in (height, width) format.</li>
</ul>`,name:"input_data_format"},{anchor:"transformers.DetaImageProcessor.preprocess.pad_size",description:`<strong>pad_size</strong> (<code>Dict[str, int]</code>, <em>optional</em>) &#x2014;
The size <code>{&quot;height&quot;: int, &quot;width&quot; int}</code> to pad the images to. Must be larger than any image size
provided for preprocessing. If <code>pad_size</code> is not provided, images will be padded to the largest
height and width in the batch.`,name:"pad_size"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/image_processing_deta.py#L888"}}),ie=new H({props:{name:"post_process_object_detection",anchor:"transformers.DetaImageProcessor.post_process_object_detection",parameters:[{name:"outputs",val:""},{name:"threshold",val:": float = 0.5"},{name:"target_sizes",val:": Union = None"},{name:"nms_threshold",val:": float = 0.7"}],parametersDescription:[{anchor:"transformers.DetaImageProcessor.post_process_object_detection.outputs",description:`<strong>outputs</strong> (<code>DetrObjectDetectionOutput</code>) &#x2014;
Raw outputs of the model.`,name:"outputs"},{anchor:"transformers.DetaImageProcessor.post_process_object_detection.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) &#x2014;
Score threshold to keep object detection predictions.`,name:"threshold"},{anchor:"transformers.DetaImageProcessor.post_process_object_detection.target_sizes",description:`<strong>target_sizes</strong> (<code>torch.Tensor</code> or <code>List[Tuple[int, int]]</code>, <em>optional</em>) &#x2014;
Tensor of shape <code>(batch_size, 2)</code> or list of tuples (<code>Tuple[int, int]</code>) containing the target size
(height, width) of each image in the batch. If left to None, predictions will not be resized.`,name:"target_sizes"},{anchor:"transformers.DetaImageProcessor.post_process_object_detection.nms_threshold",description:`<strong>nms_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.7) &#x2014;
NMS threshold.`,name:"nms_threshold"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/image_processing_deta.py#L1143",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A list of dictionaries, each dictionary containing the scores, labels and boxes for an image
in the batch as predicted by the model.</p>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>List[Dict]</code></p>
`}}),de=new _e({props:{title:"DetaModel",local:"transformers.DetaModel",headingTag:"h2"}}),ce=new H({props:{name:"class transformers.DetaModel",anchor:"transformers.DetaModel",parameters:[{name:"config",val:": DetaConfig"}],parametersDescription:[{anchor:"transformers.DetaModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaConfig">DetaConfig</a>) &#x2014;
Model configuration class with all the parameters of the model. Initializing with a config file does not
load the weights associated with the model, only the configuration. Check out the
<a href="/docs/transformers/pr_31976/ja/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/modeling_deta.py#L1426"}}),le=new H({props:{name:"forward",anchor:"transformers.DetaModel.forward",parameters:[{name:"pixel_values",val:": FloatTensor"},{name:"pixel_mask",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"decoder_inputs_embeds",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DetaModel.forward.pixel_values",description:`<strong>pixel_values</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) &#x2014;
Pixel values. Padding will be ignored by default should you provide it.</p>
<p>Pixel values can be obtained using <a href="/docs/transformers/pr_31976/ja/model_doc/auto#transformers.AutoImageProcessor">AutoImageProcessor</a>. See <code>AutoImageProcessor.__call__()</code> for details.`,name:"pixel_values"},{anchor:"transformers.DetaModel.forward.pixel_mask",description:`<strong>pixel_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, height, width)</code>, <em>optional</em>) &#x2014;
Mask to avoid performing attention on padding pixel values. Mask values selected in <code>[0, 1]</code>:</p>
<ul>
<li>1 for pixels that are real (i.e. <strong>not masked</strong>),</li>
<li>0 for pixels that are padding (i.e. <strong>masked</strong>).</li>
</ul>
<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"pixel_mask"},{anchor:"transformers.DetaModel.forward.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries)</code>, <em>optional</em>) &#x2014;
Not used by default. Can be used to mask object queries.`,name:"decoder_attention_mask"},{anchor:"transformers.DetaModel.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) &#x2014;
Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>)
<code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of
hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.DetaModel.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) &#x2014;
Optionally, instead of passing the flattened feature map (output of the backbone + projection layer), you
can choose to directly pass a flattened representation of an image.`,name:"inputs_embeds"},{anchor:"transformers.DetaModel.forward.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, hidden_size)</code>, <em>optional</em>) &#x2014;
Optionally, instead of initializing the queries with a tensor of zeros, you can choose to directly pass an
embedded representation.`,name:"decoder_inputs_embeds"},{anchor:"transformers.DetaModel.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.DetaModel.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
more detail.`,name:"output_hidden_states"},{anchor:"transformers.DetaModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether or not to return a <a href="/docs/transformers/pr_31976/ja/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/modeling_deta.py#L1592",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A <code>transformers.models.deprecated.deta.modeling_deta.DetaModelOutput</code> or a tuple of
<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
elements depending on the configuration (<a
href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaConfig"
>DetaConfig</a>) and inputs.</p>
<ul>
<li><strong>init_reference_points</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, 4)</code>) — Initial reference points sent through the Transformer decoder.</li>
<li><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</li>
<li><strong>intermediate_hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, config.decoder_layers, num_queries, hidden_size)</code>) — Stacked intermediate hidden states (output of each layer of the decoder).</li>
<li><strong>intermediate_reference_points</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, config.decoder_layers, num_queries, 4)</code>) — Stacked intermediate reference points (reference points of each layer of the decoder).</li>
<li><strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of
shape <code>(batch_size, num_queries, hidden_size)</code>. Hidden-states of the decoder at the output of each layer
plus the initial embedding outputs.</li>
<li><strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, num_queries, num_queries)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the weighted
average in the self-attention heads.</li>
<li><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_queries, num_heads, 4, 4)</code>.
Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the
weighted average in the cross-attention heads.</li>
<li><strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</li>
<li><strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of
shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each
layer plus the initial embedding outputs.</li>
<li><strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_queries, num_heads, 4, 4)</code>.
Attentions weights of the encoder, after the attention softmax, used to compute the weighted average in the
self-attention heads.</li>
<li><strong>enc_outputs_class</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.num_labels)</code>, <em>optional</em>, returned when <code>config.with_box_refine=True</code> and <code>config.two_stage=True</code>) — Predicted bounding boxes scores where the top <code>config.two_stage_num_proposals</code> scoring bounding boxes are
picked as region proposals in the first stage. Output of bounding box binary classification (i.e.
foreground and background).</li>
<li><strong>enc_outputs_coord_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, 4)</code>, <em>optional</em>, returned when <code>config.with_box_refine=True</code> and <code>config.two_stage=True</code>) — Logits of predicted bounding boxes coordinates in the first stage.</li>
<li><strong>output_proposals</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, 4)</code>, <em>optional</em>, returned when <code>config.two_stage=True</code>) — Logits of proposal bounding boxes coordinates in the gen_encoder_output_proposals.</li>
</ul>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>transformers.models.deprecated.deta.modeling_deta.DetaModelOutput</code> or <code>tuple(torch.FloatTensor)</code></p>
`}}),Z=new Ht({props:{$$slots:{default:[to]},$$scope:{ctx:z}}}),W=new Tt({props:{anchor:"transformers.DetaModel.forward.example",$$slots:{default:[oo]},$$scope:{ctx:z}}}),me=new _e({props:{title:"DetaForObjectDetection",local:"transformers.DetaForObjectDetection",headingTag:"h2"}}),pe=new H({props:{name:"class transformers.DetaForObjectDetection",anchor:"transformers.DetaForObjectDetection",parameters:[{name:"config",val:": DetaConfig"}],parametersDescription:[{anchor:"transformers.DetaForObjectDetection.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaConfig">DetaConfig</a>) &#x2014;
Model configuration class with all the parameters of the model. Initializing with a config file does not
load the weights associated with the model, only the configuration. Check out the
<a href="/docs/transformers/pr_31976/ja/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/modeling_deta.py#L1844"}}),he=new H({props:{name:"forward",anchor:"transformers.DetaForObjectDetection.forward",parameters:[{name:"pixel_values",val:": FloatTensor"},{name:"pixel_mask",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"decoder_inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DetaForObjectDetection.forward.pixel_values",description:`<strong>pixel_values</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) &#x2014;
Pixel values. Padding will be ignored by default should you provide it.</p>
<p>Pixel values can be obtained using <a href="/docs/transformers/pr_31976/ja/model_doc/auto#transformers.AutoImageProcessor">AutoImageProcessor</a>. See <code>AutoImageProcessor.__call__()</code> for details.`,name:"pixel_values"},{anchor:"transformers.DetaForObjectDetection.forward.pixel_mask",description:`<strong>pixel_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, height, width)</code>, <em>optional</em>) &#x2014;
Mask to avoid performing attention on padding pixel values. Mask values selected in <code>[0, 1]</code>:</p>
<ul>
<li>1 for pixels that are real (i.e. <strong>not masked</strong>),</li>
<li>0 for pixels that are padding (i.e. <strong>masked</strong>).</li>
</ul>
<p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"pixel_mask"},{anchor:"transformers.DetaForObjectDetection.forward.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries)</code>, <em>optional</em>) &#x2014;
Not used by default. Can be used to mask object queries.`,name:"decoder_attention_mask"},{anchor:"transformers.DetaForObjectDetection.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) &#x2014;
Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>)
<code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of
hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.DetaForObjectDetection.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) &#x2014;
Optionally, instead of passing the flattened feature map (output of the backbone + projection layer), you
can choose to directly pass a flattened representation of an image.`,name:"inputs_embeds"},{anchor:"transformers.DetaForObjectDetection.forward.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, hidden_size)</code>, <em>optional</em>) &#x2014;
Optionally, instead of initializing the queries with a tensor of zeros, you can choose to directly pass an
embedded representation.`,name:"decoder_inputs_embeds"},{anchor:"transformers.DetaForObjectDetection.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.DetaForObjectDetection.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
more detail.`,name:"output_hidden_states"},{anchor:"transformers.DetaForObjectDetection.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) &#x2014;
Whether or not to return a <a href="/docs/transformers/pr_31976/ja/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.DetaForObjectDetection.forward.labels",description:`<strong>labels</strong> (<code>List[Dict]</code> of len <code>(batch_size,)</code>, <em>optional</em>) &#x2014;
Labels for computing the bipartite matching loss. List of dicts, each dictionary containing at least the
following 2 keys: &#x2018;class_labels&#x2019; and &#x2018;boxes&#x2019; (the class labels and bounding boxes of an image in the batch
respectively). The class labels themselves should be a <code>torch.LongTensor</code> of len <code>(number of bounding boxes in the image,)</code> and the boxes a <code>torch.FloatTensor</code> of shape <code>(number of bounding boxes in the image, 4)</code>.`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_31976/src/transformers/models/deprecated/deta/modeling_deta.py#L1908",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>
<p>A <code>transformers.models.deprecated.deta.modeling_deta.DetaObjectDetectionOutput</code> or a tuple of
<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
elements depending on the configuration (<a
href="/docs/transformers/pr_31976/ja/model_doc/deta#transformers.DetaConfig"
>DetaConfig</a>) and inputs.</p>
<ul>
<li><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> are provided)) — Total loss as a linear combination of a negative log-likehood (cross-entropy) for class prediction and a
bounding box loss. The latter is defined as a linear combination of the L1 loss and the generalized
scale-invariant IoU loss.</li>
<li><strong>loss_dict</strong> (<code>Dict</code>, <em>optional</em>) — A dictionary containing the individual losses. Useful for logging.</li>
<li><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, num_classes + 1)</code>) — Classification logits (including no-object) for all queries.</li>
<li><strong>pred_boxes</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, 4)</code>) — Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These
values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding
possible padding). You can use <code>~DetaProcessor.post_process_object_detection</code> to retrieve the
unnormalized bounding boxes.</li>
<li><strong>auxiliary_outputs</strong> (<code>list[Dict]</code>, <em>optional</em>) — Optional, only returned when auxilary losses are activated (i.e. <code>config.auxiliary_loss</code> is set to <code>True</code>)
and labels are provided. It is a list of dictionaries containing the two above keys (<code>logits</code> and
<code>pred_boxes</code>) for each decoder layer.</li>
<li><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</li>
<li><strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of
shape <code>(batch_size, num_queries, hidden_size)</code>. Hidden-states of the decoder at the output of each layer
plus the initial embedding outputs.</li>
<li><strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, num_queries, num_queries)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the weighted
average in the self-attention heads.</li>
<li><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_queries, num_heads, 4, 4)</code>.
Attentions weights of the decoder’s cross-attention layer, after the attention softmax, used to compute the
weighted average in the cross-attention heads.</li>
<li><strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</li>
<li><strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of
shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each
layer plus the initial embedding outputs.</li>
<li><strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, sequence_length, num_heads, 4, 4)</code>. Attentions weights of the encoder, after the attention softmax, used to compute the weighted average
in the self-attention heads.</li>
<li><strong>intermediate_hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, config.decoder_layers, num_queries, hidden_size)</code>) — Stacked intermediate hidden states (output of each layer of the decoder).</li>
<li><strong>intermediate_reference_points</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, config.decoder_layers, num_queries, 4)</code>) — Stacked intermediate reference points (reference points of each layer of the decoder).</li>
<li><strong>init_reference_points</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, 4)</code>) — Initial reference points sent through the Transformer decoder.</li>
<li><strong>enc_outputs_class</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.num_labels)</code>, <em>optional</em>, returned when <code>config.with_box_refine=True</code> and <code>config.two_stage=True</code>) — Predicted bounding boxes scores where the top <code>config.two_stage_num_proposals</code> scoring bounding boxes are
picked as region proposals in the first stage. Output of bounding box binary classification (i.e.
foreground and background).</li>
<li><strong>enc_outputs_coord_logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, 4)</code>, <em>optional</em>, returned when <code>config.with_box_refine=True</code> and <code>config.two_stage=True</code>) — Logits of predicted bounding boxes coordinates in the first stage.</li>
<li><strong>output_proposals</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, 4)</code>, <em>optional</em>, returned when <code>config.two_stage=True</code>) — Logits of proposal bounding boxes coordinates in the gen_encoder_output_proposals.</li>
</ul>
`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>
<p><code>transformers.models.deprecated.deta.modeling_deta.DetaObjectDetectionOutput</code> or <code>tuple(torch.FloatTensor)</code></p>
`}}),O=new Ht({props:{$$slots:{default:[no]},$$scope:{ctx:z}}}),L=new Tt({props:{anchor:"transformers.DetaForObjectDetection.forward.example",$$slots:{default:[so]},$$scope:{ctx:z}}}),fe=new Kt({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/ja/model_doc/deta.md"}}),{c(){n=c("meta"),y=a(),m=c("p"),p=a(),f(v.$$.fragment),s=a(),f(T.$$.fragment),qe=a(),V=c("p"),V.innerHTML=Mt,Fe=a(),B=c("p"),B.textContent=Dt,Je=a(),G=c("p"),G.innerHTML=jt,Ne=a(),N=c("img"),Pe=a(),X=c("small"),X.innerHTML=$t,Ee=a(),Q=c("p"),Q.innerHTML=zt,Re=a(),f(Y.$$.fragment),Ze=a(),K=c("p"),K.textContent=kt,We=a(),ee=c("ul"),ee.innerHTML=Ut,Oe=a(),te=c("p"),te.textContent=It,Le=a(),f(oe.$$.fragment),Ae=a(),D=c("div"),f(ne.$$.fragment),et=a(),be=c("p"),be.innerHTML=qt,tt=a(),we=c("p"),we.innerHTML=Ft,ot=a(),f(P.$$.fragment),Se=a(),f(se.$$.fragment),He=a(),j=c("div"),f(ae.$$.fragment),nt=a(),ve=c("p"),ve.textContent=Jt,st=a(),E=c("div"),f(re.$$.fragment),at=a(),ye=c("p"),ye.textContent=Nt,rt=a(),R=c("div"),f(ie.$$.fragment),it=a(),Te=c("p"),Te.innerHTML=Pt,Ve=a(),f(de.$$.fragment),Be=a(),x=c("div"),f(ce.$$.fragment),dt=a(),xe=c("p"),xe.textContent=Et,ct=a(),Me=c("p"),Me.innerHTML=Rt,lt=a(),De=c("p"),De.innerHTML=Zt,mt=a(),k=c("div"),f(le.$$.fragment),pt=a(),je=c("p"),je.innerHTML=Wt,ht=a(),f(Z.$$.fragment),ft=a(),f(W.$$.fragment),Ge=a(),f(me.$$.fragment),Xe=a(),M=c("div"),f(pe.$$.fragment),gt=a(),Ce=c("p"),Ce.textContent=Ot,ut=a(),$e=c("p"),$e.innerHTML=Lt,_t=a(),ze=c("p"),ze.innerHTML=At,bt=a(),U=c("div"),f(he.$$.fragment),wt=a(),ke=c("p"),ke.innerHTML=St,vt=a(),f(O.$$.fragment),yt=a(),f(L.$$.fragment),Qe=a(),f(fe.$$.fragment),Ye=a(),Ue=c("p"),this.h()},l(e){const t=Yt("svelte-u9bgzb",document.head);n=l(t,"META",{name:!0,content:!0}),t.forEach(o),y=r(e),m=l(e,"P",{}),J(m).forEach(o),p=r(e),g(v.$$.fragment,e),s=r(e),g(T.$$.fragment,e),qe=r(e),V=l(e,"P",{"data-svelte-h":!0}),h(V)!=="svelte-pqtcol"&&(V.innerHTML=Mt),Fe=r(e),B=l(e,"P",{"data-svelte-h":!0}),h(B)!=="svelte-1cv3nri"&&(B.textContent=Dt),Je=r(e),G=l(e,"P",{"data-svelte-h":!0}),h(G)!=="svelte-61e8sf"&&(G.innerHTML=jt),Ne=r(e),N=l(e,"IMG",{src:!0,alt:!0,width:!0}),Pe=r(e),X=l(e,"SMALL",{"data-svelte-h":!0}),h(X)!=="svelte-d45auk"&&(X.innerHTML=$t),Ee=r(e),Q=l(e,"P",{"data-svelte-h":!0}),h(Q)!=="svelte-sqyp79"&&(Q.innerHTML=zt),Re=r(e),g(Y.$$.fragment,e),Ze=r(e),K=l(e,"P",{"data-svelte-h":!0}),h(K)!=="svelte-yu6a6k"&&(K.textContent=kt),We=r(e),ee=l(e,"UL",{"data-svelte-h":!0}),h(ee)!=="svelte-15ertcq"&&(ee.innerHTML=Ut),Oe=r(e),te=l(e,"P",{"data-svelte-h":!0}),h(te)!=="svelte-17ytafw"&&(te.textContent=It),Le=r(e),g(oe.$$.fragment,e),Ae=r(e),D=l(e,"DIV",{class:!0});var I=J(D);g(ne.$$.fragment,I),et=r(I),be=l(I,"P",{"data-svelte-h":!0}),h(be)!=="svelte-pvpbsf"&&(be.innerHTML=qt),tt=r(I),we=l(I,"P",{"data-svelte-h":!0}),h(we)!=="svelte-12eojiv"&&(we.innerHTML=Ft),ot=r(I),g(P.$$.fragment,I),I.forEach(o),Se=r(e),g(se.$$.fragment,e),He=r(e),j=l(e,"DIV",{class:!0});var q=J(j);g(ae.$$.fragment,q),nt=r(q),ve=l(q,"P",{"data-svelte-h":!0}),h(ve)!=="svelte-54uec8"&&(ve.textContent=Jt),st=r(q),E=l(q,"DIV",{class:!0});var ge=J(E);g(re.$$.fragment,ge),at=r(ge),ye=l(ge,"P",{"data-svelte-h":!0}),h(ye)!=="svelte-jgz2ra"&&(ye.textContent=Nt),ge.forEach(o),rt=r(q),R=l(q,"DIV",{class:!0});var ue=J(R);g(ie.$$.fragment,ue),it=r(ue),Te=l(ue,"P",{"data-svelte-h":!0}),h(Te)!=="svelte-1chhpl1"&&(Te.innerHTML=Pt),ue.forEach(o),q.forEach(o),Ve=r(e),g(de.$$.fragment,e),Be=r(e),x=l(e,"DIV",{class:!0});var C=J(x);g(ce.$$.fragment,C),dt=r(C),xe=l(C,"P",{"data-svelte-h":!0}),h(xe)!=="svelte-56qjtm"&&(xe.textContent=Et),ct=r(C),Me=l(C,"P",{"data-svelte-h":!0}),h(Me)!=="svelte-149c8e4"&&(Me.innerHTML=Rt),lt=r(C),De=l(C,"P",{"data-svelte-h":!0}),h(De)!=="svelte-hswkmf"&&(De.innerHTML=Zt),mt=r(C),k=l(C,"DIV",{class:!0});var A=J(k);g(le.$$.fragment,A),pt=r(A),je=l(A,"P",{"data-svelte-h":!0}),h(je)!=="svelte-bgs1wv"&&(je.innerHTML=Wt),ht=r(A),g(Z.$$.fragment,A),ft=r(A),g(W.$$.fragment,A),A.forEach(o),C.forEach(o),Ge=r(e),g(me.$$.fragment,e),Xe=r(e),M=l(e,"DIV",{class:!0});var F=J(M);g(pe.$$.fragment,F),gt=r(F),Ce=l(F,"P",{"data-svelte-h":!0}),h(Ce)!=="svelte-ga2czr"&&(Ce.textContent=Ot),ut=r(F),$e=l(F,"P",{"data-svelte-h":!0}),h($e)!=="svelte-149c8e4"&&($e.innerHTML=Lt),_t=r(F),ze=l(F,"P",{"data-svelte-h":!0}),h(ze)!=="svelte-hswkmf"&&(ze.innerHTML=At),bt=r(F),U=l(F,"DIV",{class:!0});var S=J(U);g(he.$$.fragment,S),wt=r(S),ke=l(S,"P",{"data-svelte-h":!0}),h(ke)!=="svelte-57uwcx"&&(ke.innerHTML=St),vt=r(S),g(O.$$.fragment,S),yt=r(S),g(L.$$.fragment,S),S.forEach(o),F.forEach(o),Qe=r(e),g(fe.$$.fragment,e),Ye=r(e),Ue=l(e,"P",{}),J(Ue).forEach(o),this.h()},h(){$(n,"name","hf:doc:metadata"),$(n,"content",ro),Bt(N.src,Ct="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/model_doc/deta_architecture.jpg")||$(N,"src",Ct),$(N,"alt","drawing"),$(N,"width","600"),$(D,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(E,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(R,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(k,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(x,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),$(M,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){d(document.head,n),i(e,y,t),i(e,m,t),i(e,p,t),u(v,e,t),i(e,s,t),u(T,e,t),i(e,qe,t),i(e,V,t),i(e,Fe,t),i(e,B,t),i(e,Je,t),i(e,G,t),i(e,Ne,t),i(e,N,t),i(e,Pe,t),i(e,X,t),i(e,Ee,t),i(e,Q,t),i(e,Re,t),u(Y,e,t),i(e,Ze,t),i(e,K,t),i(e,We,t),i(e,ee,t),i(e,Oe,t),i(e,te,t),i(e,Le,t),u(oe,e,t),i(e,Ae,t),i(e,D,t),u(ne,D,null),d(D,et),d(D,be),d(D,tt),d(D,we),d(D,ot),u(P,D,null),i(e,Se,t),u(se,e,t),i(e,He,t),i(e,j,t),u(ae,j,null),d(j,nt),d(j,ve),d(j,st),d(j,E),u(re,E,null),d(E,at),d(E,ye),d(j,rt),d(j,R),u(ie,R,null),d(R,it),d(R,Te),i(e,Ve,t),u(de,e,t),i(e,Be,t),i(e,x,t),u(ce,x,null),d(x,dt),d(x,xe),d(x,ct),d(x,Me),d(x,lt),d(x,De),d(x,mt),d(x,k),u(le,k,null),d(k,pt),d(k,je),d(k,ht),u(Z,k,null),d(k,ft),u(W,k,null),i(e,Ge,t),u(me,e,t),i(e,Xe,t),i(e,M,t),u(pe,M,null),d(M,gt),d(M,Ce),d(M,ut),d(M,$e),d(M,_t),d(M,ze),d(M,bt),d(M,U),u(he,U,null),d(U,wt),d(U,ke),d(U,vt),u(O,U,null),d(U,yt),u(L,U,null),i(e,Qe,t),u(fe,e,t),i(e,Ye,t),i(e,Ue,t),Ke=!0},p(e,[t]){const I={};t&2&&(I.$$scope={dirty:t,ctx:e}),P.$set(I);const q={};t&2&&(q.$$scope={dirty:t,ctx:e}),Z.$set(q);const ge={};t&2&&(ge.$$scope={dirty:t,ctx:e}),W.$set(ge);const ue={};t&2&&(ue.$$scope={dirty:t,ctx:e}),O.$set(ue);const C={};t&2&&(C.$$scope={dirty:t,ctx:e}),L.$set(C)},i(e){Ke||(_(v.$$.fragment,e),_(T.$$.fragment,e),_(Y.$$.fragment,e),_(oe.$$.fragment,e),_(ne.$$.fragment,e),_(P.$$.fragment,e),_(se.$$.fragment,e),_(ae.$$.fragment,e),_(re.$$.fragment,e),_(ie.$$.fragment,e),_(de.$$.fragment,e),_(ce.$$.fragment,e),_(le.$$.fragment,e),_(Z.$$.fragment,e),_(W.$$.fragment,e),_(me.$$.fragment,e),_(pe.$$.fragment,e),_(he.$$.fragment,e),_(O.$$.fragment,e),_(L.$$.fragment,e),_(fe.$$.fragment,e),Ke=!0)},o(e){b(v.$$.fragment,e),b(T.$$.fragment,e),b(Y.$$.fragment,e),b(oe.$$.fragment,e),b(ne.$$.fragment,e),b(P.$$.fragment,e),b(se.$$.fragment,e),b(ae.$$.fragment,e),b(re.$$.fragment,e),b(ie.$$.fragment,e),b(de.$$.fragment,e),b(ce.$$.fragment,e),b(le.$$.fragment,e),b(Z.$$.fragment,e),b(W.$$.fragment,e),b(me.$$.fragment,e),b(pe.$$.fragment,e),b(he.$$.fragment,e),b(O.$$.fragment,e),b(L.$$.fragment,e),b(fe.$$.fragment,e),Ke=!1},d(e){e&&(o(y),o(m),o(p),o(s),o(qe),o(V),o(Fe),o(B),o(Je),o(G),o(Ne),o(N),o(Pe),o(X),o(Ee),o(Q),o(Re),o(Ze),o(K),o(We),o(ee),o(Oe),o(te),o(Le),o(Ae),o(D),o(Se),o(He),o(j),o(Ve),o(Be),o(x),o(Ge),o(Xe),o(M),o(Qe),o(Ye),o(Ue)),o(n),w(v,e),w(T,e),w(Y,e),w(oe,e),w(ne),w(P),w(se,e),w(ae),w(re),w(ie),w(de,e),w(ce),w(le),w(Z),w(W),w(me,e),w(pe),w(he),w(O),w(L),w(fe,e)}}}const ro='{"title":"DETA","local":"deta","sections":[{"title":"Overview","local":"overview","sections":[],"depth":2},{"title":"Resources","local":"resources","sections":[],"depth":2},{"title":"DetaConfig","local":"transformers.DetaConfig","sections":[],"depth":2},{"title":"DetaImageProcessor","local":"transformers.DetaImageProcessor","sections":[],"depth":2},{"title":"DetaModel","local":"transformers.DetaModel","sections":[],"depth":2},{"title":"DetaForObjectDetection","local":"transformers.DetaForObjectDetection","sections":[],"depth":2}],"depth":1}';function io(z){return Gt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class uo extends Xt{constructor(n){super(),Qt(this,n,io,ao,Vt,{})}}export{uo as component};

Xet Storage Details

Size:
85.6 kB
·
Xet hash:
020ba3102ef0eb96aabcac3183af07c7ded10696b8f4f242fe1940618b776126

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.