Buckets:
| import{s as Xs,o as Ys,n as xt}from"../chunks/scheduler.25b97de1.js";import{S as Qs,i as Ks,g as i,s as n,r as h,A as er,h as d,f as t,c as s,j as v,u as p,x as m,k as x,y as r,a,v as g,d as u,t as f,w as _}from"../chunks/index.d9030fc9.js";import{T as es}from"../chunks/Tip.baa67368.js";import{D as M}from"../chunks/Docstring.e257edda.js";import{C as vt}from"../chunks/CodeBlock.e6cd0d95.js";import{E as on}from"../chunks/ExampleCodeBlock.20db4b6e.js";import{P as tr}from"../chunks/PipelineTag.5f100392.js";import{H as P,E as or}from"../chunks/EditOnGithub.91d95064.js";function nr(z){let c,T="Examples:",b,w,y;return w=new vt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERldHJDb25maWclMkMlMjBEZXRyTW9kZWwlMEElMEElMjMlMjBJbml0aWFsaXppbmclMjBhJTIwREVUUiUyMGZhY2Vib29rJTJGZGV0ci1yZXNuZXQtNTAlMjBzdHlsZSUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwRGV0ckNvbmZpZygpJTBBJTBBJTIzJTIwSW5pdGlhbGl6aW5nJTIwYSUyMG1vZGVsJTIwKHdpdGglMjByYW5kb20lMjB3ZWlnaHRzKSUyMGZyb20lMjB0aGUlMjBmYWNlYm9vayUyRmRldHItcmVzbmV0LTUwJTIwc3R5bGUlMjBjb25maWd1cmF0aW9uJTBBbW9kZWwlMjAlM0QlMjBEZXRyTW9kZWwoY29uZmlndXJhdGlvbiklMEElMEElMjMlMjBBY2Nlc3NpbmclMjB0aGUlMjBtb2RlbCUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwbW9kZWwuY29uZmln",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DetrConfig, DetrModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a DETR facebook/detr-resnet-50 style configuration</span> | |
| <span class="hljs-meta">>>> </span>configuration = DetrConfig() | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a model (with random weights) from the facebook/detr-resnet-50 style configuration</span> | |
| <span class="hljs-meta">>>> </span>model = DetrModel(configuration) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Accessing the model configuration</span> | |
| <span class="hljs-meta">>>> </span>configuration = model.config`,wrap:!1}}),{c(){c=i("p"),c.textContent=T,b=n(),h(w.$$.fragment)},l(l){c=d(l,"P",{"data-svelte-h":!0}),m(c)!=="svelte-kvfsh7"&&(c.textContent=T),b=s(l),p(w.$$.fragment,l)},m(l,D){a(l,c,D),a(l,b,D),g(w,l,D),y=!0},p:xt,i(l){y||(u(w.$$.fragment,l),y=!0)},o(l){f(w.$$.fragment,l),y=!1},d(l){l&&(t(c),t(b)),_(w,l)}}}function sr(z){let c,T=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){c=i("p"),c.innerHTML=T},l(b){c=d(b,"P",{"data-svelte-h":!0}),m(c)!=="svelte-fincs2"&&(c.innerHTML=T)},m(b,w){a(b,c,w)},p:xt,d(b){b&&t(c)}}}function rr(z){let c,T="Examples:",b,w,y;return w=new vt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9JbWFnZVByb2Nlc3NvciUyQyUyMERldHJNb2RlbCUwQWZyb20lMjBQSUwlMjBpbXBvcnQlMjBJbWFnZSUwQWltcG9ydCUyMHJlcXVlc3RzJTBBJTBBdXJsJTIwJTNEJTIwJTIyaHR0cCUzQSUyRiUyRmltYWdlcy5jb2NvZGF0YXNldC5vcmclMkZ2YWwyMDE3JTJGMDAwMDAwMDM5NzY5LmpwZyUyMiUwQWltYWdlJTIwJTNEJTIwSW1hZ2Uub3BlbihyZXF1ZXN0cy5nZXQodXJsJTJDJTIwc3RyZWFtJTNEVHJ1ZSkucmF3KSUwQSUwQWltYWdlX3Byb2Nlc3NvciUyMCUzRCUyMEF1dG9JbWFnZVByb2Nlc3Nvci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZkZXRyLXJlc25ldC01MCUyMiklMEFtb2RlbCUyMCUzRCUyMERldHJNb2RlbC5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZkZXRyLXJlc25ldC01MCUyMiklMEElMEElMjMlMjBwcmVwYXJlJTIwaW1hZ2UlMjBmb3IlMjB0aGUlMjBtb2RlbCUwQWlucHV0cyUyMCUzRCUyMGltYWdlX3Byb2Nlc3NvcihpbWFnZXMlM0RpbWFnZSUyQyUyMHJldHVybl90ZW5zb3JzJTNEJTIycHQlMjIpJTBBJTBBJTIzJTIwZm9yd2FyZCUyMHBhc3MlMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpJTBBJTBBJTIzJTIwdGhlJTIwbGFzdCUyMGhpZGRlbiUyMHN0YXRlcyUyMGFyZSUyMHRoZSUyMGZpbmFsJTIwcXVlcnklMjBlbWJlZGRpbmdzJTIwb2YlMjB0aGUlMjBUcmFuc2Zvcm1lciUyMGRlY29kZXIlMEElMjMlMjB0aGVzZSUyMGFyZSUyMG9mJTIwc2hhcGUlMjAoYmF0Y2hfc2l6ZSUyQyUyMG51bV9xdWVyaWVzJTJDJTIwaGlkZGVuX3NpemUpJTBBbGFzdF9oaWRkZW5fc3RhdGVzJTIwJTNEJTIwb3V0cHV0cy5sYXN0X2hpZGRlbl9zdGF0ZSUwQWxpc3QobGFzdF9oaWRkZW5fc3RhdGVzLnNoYXBlKQ==",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor, DetrModel | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> requests | |
| <span class="hljs-meta">>>> </span>url = <span class="hljs-string">"http://images.cocodataset.org/val2017/000000039769.jpg"</span> | |
| <span class="hljs-meta">>>> </span>image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw) | |
| <span class="hljs-meta">>>> </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">"facebook/detr-resnet-50"</span>) | |
| <span class="hljs-meta">>>> </span>model = DetrModel.from_pretrained(<span class="hljs-string">"facebook/detr-resnet-50"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># prepare image for the model</span> | |
| <span class="hljs-meta">>>> </span>inputs = image_processor(images=image, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># forward pass</span> | |
| <span class="hljs-meta">>>> </span>outputs = model(**inputs) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># the last hidden states are the final query embeddings of the Transformer decoder</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># these are of shape (batch_size, num_queries, hidden_size)</span> | |
| <span class="hljs-meta">>>> </span>last_hidden_states = outputs.last_hidden_state | |
| <span class="hljs-meta">>>> </span><span class="hljs-built_in">list</span>(last_hidden_states.shape) | |
| [<span class="hljs-number">1</span>, <span class="hljs-number">100</span>, <span class="hljs-number">256</span>]`,wrap:!1}}),{c(){c=i("p"),c.textContent=T,b=n(),h(w.$$.fragment)},l(l){c=d(l,"P",{"data-svelte-h":!0}),m(c)!=="svelte-kvfsh7"&&(c.textContent=T),b=s(l),p(w.$$.fragment,l)},m(l,D){a(l,c,D),a(l,b,D),g(w,l,D),y=!0},p:xt,i(l){y||(u(w.$$.fragment,l),y=!0)},o(l){f(w.$$.fragment,l),y=!1},d(l){l&&(t(c),t(b)),_(w,l)}}}function ar(z){let c,T=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){c=i("p"),c.innerHTML=T},l(b){c=d(b,"P",{"data-svelte-h":!0}),m(c)!=="svelte-fincs2"&&(c.innerHTML=T)},m(b,w){a(b,c,w)},p:xt,d(b){b&&t(c)}}}function ir(z){let c,T="Examples:",b,w,y;return w=new vt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9JbWFnZVByb2Nlc3NvciUyQyUyMERldHJGb3JPYmplY3REZXRlY3Rpb24lMEFpbXBvcnQlMjB0b3JjaCUwQWZyb20lMjBQSUwlMjBpbXBvcnQlMjBJbWFnZSUwQWltcG9ydCUyMHJlcXVlc3RzJTBBJTBBdXJsJTIwJTNEJTIwJTIyaHR0cCUzQSUyRiUyRmltYWdlcy5jb2NvZGF0YXNldC5vcmclMkZ2YWwyMDE3JTJGMDAwMDAwMDM5NzY5LmpwZyUyMiUwQWltYWdlJTIwJTNEJTIwSW1hZ2Uub3BlbihyZXF1ZXN0cy5nZXQodXJsJTJDJTIwc3RyZWFtJTNEVHJ1ZSkucmF3KSUwQSUwQWltYWdlX3Byb2Nlc3NvciUyMCUzRCUyMEF1dG9JbWFnZVByb2Nlc3Nvci5mcm9tX3ByZXRyYWluZWQoJTIyZmFjZWJvb2slMkZkZXRyLXJlc25ldC01MCUyMiklMEFtb2RlbCUyMCUzRCUyMERldHJGb3JPYmplY3REZXRlY3Rpb24uZnJvbV9wcmV0cmFpbmVkKCUyMmZhY2Vib29rJTJGZGV0ci1yZXNuZXQtNTAlMjIpJTBBJTBBaW5wdXRzJTIwJTNEJTIwaW1hZ2VfcHJvY2Vzc29yKGltYWdlcyUzRGltYWdlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEFvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKippbnB1dHMpJTBBJTBBJTIzJTIwY29udmVydCUyMG91dHB1dHMlMjAoYm91bmRpbmclMjBib3hlcyUyMGFuZCUyMGNsYXNzJTIwbG9naXRzKSUyMHRvJTIwUGFzY2FsJTIwVk9DJTIwZm9ybWF0JTIwKHhtaW4lMkMlMjB5bWluJTJDJTIweG1heCUyQyUyMHltYXgpJTBBdGFyZ2V0X3NpemVzJTIwJTNEJTIwdG9yY2gudGVuc29yKCU1QmltYWdlLnNpemUlNUIlM0ElM0EtMSU1RCU1RCklMEFyZXN1bHRzJTIwJTNEJTIwaW1hZ2VfcHJvY2Vzc29yLnBvc3RfcHJvY2Vzc19vYmplY3RfZGV0ZWN0aW9uKG91dHB1dHMlMkMlMjB0aHJlc2hvbGQlM0QwLjklMkMlMjB0YXJnZXRfc2l6ZXMlM0R0YXJnZXRfc2l6ZXMpJTVCJTBBJTIwJTIwJTIwJTIwMCUwQSU1RCUwQSUwQWZvciUyMHNjb3JlJTJDJTIwbGFiZWwlMkMlMjBib3glMjBpbiUyMHppcChyZXN1bHRzJTVCJTIyc2NvcmVzJTIyJTVEJTJDJTIwcmVzdWx0cyU1QiUyMmxhYmVscyUyMiU1RCUyQyUyMHJlc3VsdHMlNUIlMjJib3hlcyUyMiU1RCklM0ElMEElMjAlMjAlMjAlMjBib3glMjAlM0QlMjAlNUJyb3VuZChpJTJDJTIwMiklMjBmb3IlMjBpJTIwaW4lMjBib3gudG9saXN0KCklNUQlMEElMjAlMjAlMjAlMjBwcmludCglMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmJTIyRGV0ZWN0ZWQlMjAlN0Jtb2RlbC5jb25maWcuaWQybGFiZWwlNUJsYWJlbC5pdGVtKCklNUQlN0QlMjB3aXRoJTIwY29uZmlkZW5jZSUyMCUyMiUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGYlMjIlN0Jyb3VuZChzY29yZS5pdGVtKCklMkMlMjAzKSU3RCUyMGF0JTIwbG9jYXRpb24lMjAlN0Jib3glN0QlMjIlMEElMjAlMjAlMjAlMjAp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor, DetrForObjectDetection | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> requests | |
| <span class="hljs-meta">>>> </span>url = <span class="hljs-string">"http://images.cocodataset.org/val2017/000000039769.jpg"</span> | |
| <span class="hljs-meta">>>> </span>image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw) | |
| <span class="hljs-meta">>>> </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">"facebook/detr-resnet-50"</span>) | |
| <span class="hljs-meta">>>> </span>model = DetrForObjectDetection.from_pretrained(<span class="hljs-string">"facebook/detr-resnet-50"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = image_processor(images=image, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span>outputs = model(**inputs) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)</span> | |
| <span class="hljs-meta">>>> </span>target_sizes = torch.tensor([image.size[::-<span class="hljs-number">1</span>]]) | |
| <span class="hljs-meta">>>> </span>results = image_processor.post_process_object_detection(outputs, threshold=<span class="hljs-number">0.9</span>, target_sizes=target_sizes)[ | |
| <span class="hljs-meta">... </span> <span class="hljs-number">0</span> | |
| <span class="hljs-meta">... </span>] | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> score, label, box <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(results[<span class="hljs-string">"scores"</span>], results[<span class="hljs-string">"labels"</span>], results[<span class="hljs-string">"boxes"</span>]): | |
| <span class="hljs-meta">... </span> box = [<span class="hljs-built_in">round</span>(i, <span class="hljs-number">2</span>) <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> box.tolist()] | |
| <span class="hljs-meta">... </span> <span class="hljs-built_in">print</span>( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">f"Detected <span class="hljs-subst">{model.config.id2label[label.item()]}</span> with confidence "</span> | |
| <span class="hljs-meta">... </span> <span class="hljs-string">f"<span class="hljs-subst">{<span class="hljs-built_in">round</span>(score.item(), <span class="hljs-number">3</span>)}</span> at location <span class="hljs-subst">{box}</span>"</span> | |
| <span class="hljs-meta">... </span> ) | |
| Detected remote <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.998</span> at location [<span class="hljs-number">40.16</span>, <span class="hljs-number">70.81</span>, <span class="hljs-number">175.55</span>, <span class="hljs-number">117.98</span>] | |
| Detected remote <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.996</span> at location [<span class="hljs-number">333.24</span>, <span class="hljs-number">72.55</span>, <span class="hljs-number">368.33</span>, <span class="hljs-number">187.66</span>] | |
| Detected couch <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.995</span> at location [-<span class="hljs-number">0.02</span>, <span class="hljs-number">1.15</span>, <span class="hljs-number">639.73</span>, <span class="hljs-number">473.76</span>] | |
| Detected cat <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.999</span> at location [<span class="hljs-number">13.24</span>, <span class="hljs-number">52.05</span>, <span class="hljs-number">314.02</span>, <span class="hljs-number">470.93</span>] | |
| Detected cat <span class="hljs-keyword">with</span> confidence <span class="hljs-number">0.999</span> at location [<span class="hljs-number">345.4</span>, <span class="hljs-number">23.85</span>, <span class="hljs-number">640.37</span>, <span class="hljs-number">368.72</span>]`,wrap:!1}}),{c(){c=i("p"),c.textContent=T,b=n(),h(w.$$.fragment)},l(l){c=d(l,"P",{"data-svelte-h":!0}),m(c)!=="svelte-kvfsh7"&&(c.textContent=T),b=s(l),p(w.$$.fragment,l)},m(l,D){a(l,c,D),a(l,b,D),g(w,l,D),y=!0},p:xt,i(l){y||(u(w.$$.fragment,l),y=!0)},o(l){f(w.$$.fragment,l),y=!1},d(l){l&&(t(c),t(b)),_(w,l)}}}function dr(z){let c,T=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code> | |
| instance afterwards instead of this since the former takes care of running the pre and post processing steps while | |
| the latter silently ignores them.`;return{c(){c=i("p"),c.innerHTML=T},l(b){c=d(b,"P",{"data-svelte-h":!0}),m(c)!=="svelte-fincs2"&&(c.innerHTML=T)},m(b,w){a(b,c,w)},p:xt,d(b){b&&t(c)}}}function cr(z){let c,T="Examples:",b,w,y;return w=new vt({props:{code:"aW1wb3J0JTIwaW8lMEFpbXBvcnQlMjByZXF1ZXN0cyUwQWZyb20lMjBQSUwlMjBpbXBvcnQlMjBJbWFnZSUwQWltcG9ydCUyMHRvcmNoJTBBaW1wb3J0JTIwbnVtcHklMEElMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b0ltYWdlUHJvY2Vzc29yJTJDJTIwRGV0ckZvclNlZ21lbnRhdGlvbiUwQWZyb20lMjB0cmFuc2Zvcm1lcnMuaW1hZ2VfdHJhbnNmb3JtcyUyMGltcG9ydCUyMHJnYl90b19pZCUwQSUwQXVybCUyMCUzRCUyMCUyMmh0dHAlM0ElMkYlMkZpbWFnZXMuY29jb2RhdGFzZXQub3JnJTJGdmFsMjAxNyUyRjAwMDAwMDAzOTc2OS5qcGclMjIlMEFpbWFnZSUyMCUzRCUyMEltYWdlLm9wZW4ocmVxdWVzdHMuZ2V0KHVybCUyQyUyMHN0cmVhbSUzRFRydWUpLnJhdyklMEElMEFpbWFnZV9wcm9jZXNzb3IlMjAlM0QlMjBBdXRvSW1hZ2VQcm9jZXNzb3IuZnJvbV9wcmV0cmFpbmVkKCUyMmZhY2Vib29rJTJGZGV0ci1yZXNuZXQtNTAtcGFub3B0aWMlMjIpJTBBbW9kZWwlMjAlM0QlMjBEZXRyRm9yU2VnbWVudGF0aW9uLmZyb21fcHJldHJhaW5lZCglMjJmYWNlYm9vayUyRmRldHItcmVzbmV0LTUwLXBhbm9wdGljJTIyKSUwQSUwQSUyMyUyMHByZXBhcmUlMjBpbWFnZSUyMGZvciUyMHRoZSUyMG1vZGVsJTBBaW5wdXRzJTIwJTNEJTIwaW1hZ2VfcHJvY2Vzc29yKGltYWdlcyUzRGltYWdlJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEElMEElMjMlMjBmb3J3YXJkJTIwcGFzcyUwQW91dHB1dHMlMjAlM0QlMjBtb2RlbCgqKmlucHV0cyklMEElMEElMjMlMjBVc2UlMjB0aGUlMjAlNjBwb3N0X3Byb2Nlc3NfcGFub3B0aWNfc2VnbWVudGF0aW9uJTYwJTIwbWV0aG9kJTIwb2YlMjB0aGUlMjAlNjBpbWFnZV9wcm9jZXNzb3IlNjAlMjB0byUyMHJldHJpZXZlJTIwcG9zdC1wcm9jZXNzZWQlMjBwYW5vcHRpYyUyMHNlZ21lbnRhdGlvbiUyMG1hcHMlMEElMjMlMjBTZWdtZW50YXRpb24lMjByZXN1bHRzJTIwYXJlJTIwcmV0dXJuZWQlMjBhcyUyMGElMjBsaXN0JTIwb2YlMjBkaWN0aW9uYXJpZXMlMEFyZXN1bHQlMjAlM0QlMjBpbWFnZV9wcm9jZXNzb3IucG9zdF9wcm9jZXNzX3Bhbm9wdGljX3NlZ21lbnRhdGlvbihvdXRwdXRzJTJDJTIwdGFyZ2V0X3NpemVzJTNEJTVCKDMwMCUyQyUyMDUwMCklNUQpJTBBJTBBJTIzJTIwQSUyMHRlbnNvciUyMG9mJTIwc2hhcGUlMjAoaGVpZ2h0JTJDJTIwd2lkdGgpJTIwd2hlcmUlMjBlYWNoJTIwdmFsdWUlMjBkZW5vdGVzJTIwYSUyMHNlZ21lbnQlMjBpZCUyQyUyMGZpbGxlZCUyMHdpdGglMjAtMSUyMGlmJTIwbm8lMjBzZWdtZW50JTIwaXMlMjBmb3VuZCUwQXBhbm9wdGljX3NlZyUyMCUzRCUyMHJlc3VsdCU1QjAlNUQlNUIlMjJzZWdtZW50YXRpb24lMjIlNUQlMEElMjMlMjBHZXQlMjBwcmVkaWN0aW9uJTIwc2NvcmUlMjBhbmQlMjBzZWdtZW50X2lkJTIwdG8lMjBjbGFzc19pZCUyMG1hcHBpbmclMjBvZiUyMGVhY2glMjBzZWdtZW50JTBBcGFub3B0aWNfc2VnbWVudHNfaW5mbyUyMCUzRCUyMHJlc3VsdCU1QjAlNUQlNUIlMjJzZWdtZW50c19pbmZvJTIyJTVE",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> io | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> requests | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> torch | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor, DetrForSegmentation | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers.image_transforms <span class="hljs-keyword">import</span> rgb_to_id | |
| <span class="hljs-meta">>>> </span>url = <span class="hljs-string">"http://images.cocodataset.org/val2017/000000039769.jpg"</span> | |
| <span class="hljs-meta">>>> </span>image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw) | |
| <span class="hljs-meta">>>> </span>image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">"facebook/detr-resnet-50-panoptic"</span>) | |
| <span class="hljs-meta">>>> </span>model = DetrForSegmentation.from_pretrained(<span class="hljs-string">"facebook/detr-resnet-50-panoptic"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># prepare image for the model</span> | |
| <span class="hljs-meta">>>> </span>inputs = image_processor(images=image, return_tensors=<span class="hljs-string">"pt"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># forward pass</span> | |
| <span class="hljs-meta">>>> </span>outputs = model(**inputs) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Use the \`post_process_panoptic_segmentation\` method of the \`image_processor\` to retrieve post-processed panoptic segmentation maps</span> | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Segmentation results are returned as a list of dictionaries</span> | |
| <span class="hljs-meta">>>> </span>result = image_processor.post_process_panoptic_segmentation(outputs, target_sizes=[(<span class="hljs-number">300</span>, <span class="hljs-number">500</span>)]) | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># A tensor of shape (height, width) where each value denotes a segment id, filled with -1 if no segment is found</span> | |
| <span class="hljs-meta">>>> </span>panoptic_seg = result[<span class="hljs-number">0</span>][<span class="hljs-string">"segmentation"</span>] | |
| <span class="hljs-meta">>>> </span><span class="hljs-comment"># Get prediction score and segment_id to class_id mapping of each segment</span> | |
| <span class="hljs-meta">>>> </span>panoptic_segments_info = result[<span class="hljs-number">0</span>][<span class="hljs-string">"segments_info"</span>]`,wrap:!1}}),{c(){c=i("p"),c.textContent=T,b=n(),h(w.$$.fragment)},l(l){c=d(l,"P",{"data-svelte-h":!0}),m(c)!=="svelte-kvfsh7"&&(c.textContent=T),b=s(l),p(w.$$.fragment,l)},m(l,D){a(l,c,D),a(l,b,D),g(w,l,D),y=!0},p:xt,i(l){y||(u(w.$$.fragment,l),y=!0)},o(l){f(w.$$.fragment,l),y=!1},d(l){l&&(t(c),t(b)),_(w,l)}}}function lr(z){let c,T,b,w,y,l,D,eo,pe,ts=`The DETR model was proposed in <a href="https://arxiv.org/abs/2005.12872" rel="nofollow">End-to-End Object Detection with Transformers</a> by | |
| Nicolas Carion, Francisco Massa, Gabriel Synnaeve, Nicolas Usunier, Alexander Kirillov and Sergey Zagoruyko. DETR | |
| consists of a convolutional backbone followed by an encoder-decoder Transformer which can be trained end-to-end for | |
| object detection. It greatly simplifies a lot of the complexity of models like Faster-R-CNN and Mask-R-CNN, which use | |
| things like region proposals, non-maximum suppression procedure and anchor generation. Moreover, DETR can also be | |
| naturally extended to perform panoptic segmentation, by simply adding a mask head on top of the decoder outputs.`,to,ge,os="The abstract from the paper is the following:",oo,ue,ns=`<em>We present a new method that views object detection as a direct set prediction problem. Our approach streamlines the | |
| detection pipeline, effectively removing the need for many hand-designed components like a non-maximum suppression | |
| procedure or anchor generation that explicitly encode our prior knowledge about the task. The main ingredients of the | |
| new framework, called DEtection TRansformer or DETR, are a set-based global loss that forces unique predictions via | |
| bipartite matching, and a transformer encoder-decoder architecture. Given a fixed small set of learned object queries, | |
| DETR reasons about the relations of the objects and the global image context to directly output the final set of | |
| predictions in parallel. The new model is conceptually simple and does not require a specialized library, unlike many | |
| other modern detectors. DETR demonstrates accuracy and run-time performance on par with the well-established and | |
| highly-optimized Faster RCNN baseline on the challenging COCO object detection dataset. Moreover, DETR can be easily | |
| generalized to produce panoptic segmentation in a unified manner. We show that it significantly outperforms competitive | |
| baselines.</em>`,no,fe,ss='This model was contributed by <a href="https://huggingface.co/nielsr" rel="nofollow">nielsr</a>. The original code can be found <a href="https://github.com/facebookresearch/detr" rel="nofollow">here</a>.',so,_e,ro,be,rs='Here’s a TLDR explaining how <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> works:',ao,we,as=`First, an image is sent through a pre-trained convolutional backbone (in the paper, the authors use | |
| ResNet-50/ResNet-101). Let’s assume we also add a batch dimension. This means that the input to the backbone is a | |
| tensor of shape <code>(batch_size, 3, height, width)</code>, assuming the image has 3 color channels (RGB). The CNN backbone | |
| outputs a new lower-resolution feature map, typically of shape <code>(batch_size, 2048, height/32, width/32)</code>. This is | |
| then projected to match the hidden dimension of the Transformer of DETR, which is <code>256</code> by default, using a | |
| <code>nn.Conv2D</code> layer. So now, we have a tensor of shape <code>(batch_size, 256, height/32, width/32).</code> Next, the | |
| feature map is flattened and transposed to obtain a tensor of shape <code>(batch_size, seq_len, d_model)</code> = | |
| <code>(batch_size, width/32*height/32, 256)</code>. So a difference with NLP models is that the sequence length is actually | |
| longer than usual, but with a smaller <code>d_model</code> (which in NLP is typically 768 or higher).`,io,ye,is=`Next, this is sent through the encoder, outputting <code>encoder_hidden_states</code> of the same shape (you can consider | |
| these as image features). Next, so-called <strong>object queries</strong> are sent through the decoder. This is a tensor of shape | |
| <code>(batch_size, num_queries, d_model)</code>, with <code>num_queries</code> typically set to 100 and initialized with zeros. | |
| These input embeddings are learnt positional encodings that the authors refer to as object queries, and similarly to | |
| the encoder, they are added to the input of each attention layer. Each object query will look for a particular object | |
| in the image. The decoder updates these embeddings through multiple self-attention and encoder-decoder attention layers | |
| to output <code>decoder_hidden_states</code> of the same shape: <code>(batch_size, num_queries, d_model)</code>. Next, two heads | |
| are added on top for object detection: a linear layer for classifying each object query into one of the objects or “no | |
| object”, and a MLP to predict bounding boxes for each query.`,co,Te,ds=`The model is trained using a <strong>bipartite matching loss</strong>: so what we actually do is compare the predicted classes + | |
| bounding boxes of each of the N = 100 object queries to the ground truth annotations, padded up to the same length N | |
| (so if an image only contains 4 objects, 96 annotations will just have a “no object” as class and “no bounding box” as | |
| bounding box). The <a href="https://en.wikipedia.org/wiki/Hungarian_algorithm" rel="nofollow">Hungarian matching algorithm</a> is used to find | |
| an optimal one-to-one mapping of each of the N queries to each of the N annotations. Next, standard cross-entropy (for | |
| the classes) and a linear combination of the L1 and <a href="https://giou.stanford.edu/" rel="nofollow">generalized IoU loss</a> (for the | |
| bounding boxes) are used to optimize the parameters of the model.`,lo,ve,cs=`DETR can be naturally extended to perform panoptic segmentation (which unifies semantic segmentation and instance | |
| segmentation). <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> adds a segmentation mask head on top of | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a>. The mask head can be trained either jointly, or in a two steps process, | |
| where one first trains a <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> model to detect bounding boxes around both | |
| “things” (instances) and “stuff” (background things like trees, roads, sky), then freeze all the weights and train only | |
| the mask head for 25 epochs. Experimentally, these two approaches give similar results. Note that predicting boxes is | |
| required for the training to be possible, since the Hungarian matching is computed using distances between boxes.`,mo,xe,ho,De,ls=`<li>DETR uses so-called <strong>object queries</strong> to detect objects in an image. The number of queries determines the maximum | |
| number of objects that can be detected in a single image, and is set to 100 by default (see parameter | |
| <code>num_queries</code> of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a>). Note that it’s good to have some slack (in COCO, the | |
| authors used 100, while the maximum number of objects in a COCO image is ~70).</li> <li>The decoder of DETR updates the query embeddings in parallel. This is different from language models like GPT-2, | |
| which use autoregressive decoding instead of parallel. Hence, no causal attention mask is used.</li> <li>DETR adds position embeddings to the hidden states at each self-attention and cross-attention layer before projecting | |
| to queries and keys. For the position embeddings of the image, one can choose between fixed sinusoidal or learned | |
| absolute position embeddings. By default, the parameter <code>position_embedding_type</code> of | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a> is set to <code>"sine"</code>.</li> <li>During training, the authors of DETR did find it helpful to use auxiliary losses in the decoder, especially to help | |
| the model output the correct number of objects of each class. If you set the parameter <code>auxiliary_loss</code> of | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a> to <code>True</code>, then prediction feedforward neural networks and Hungarian losses | |
| are added after each decoder layer (with the FFNs sharing parameters).</li> <li>If you want to train the model in a distributed environment across multiple nodes, then one should update the | |
| <em>num_boxes</em> variable in the <em>DetrLoss</em> class of <em>modeling_detr.py</em>. When training on multiple nodes, this should be | |
| set to the average number of target boxes across all nodes, as can be seen in the original implementation <a href="https://github.com/facebookresearch/detr/blob/a54b77800eb8e64e3ad0d8237789fcbf2f8350c5/models/detr.py#L227-L232" rel="nofollow">here</a>.</li> <li><a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> and <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> can be initialized with | |
| any convolutional backbone available in the <a href="https://github.com/rwightman/pytorch-image-models" rel="nofollow">timm library</a>. | |
| Initializing with a MobileNet backbone for example can be done by setting the <code>backbone</code> attribute of | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a> to <code>"tf_mobilenetv3_small_075"</code>, and then initializing the model with that | |
| config.</li> <li>DETR resizes the input images such that the shortest side is at least a certain amount of pixels while the longest is | |
| at most 1333 pixels. At training time, scale augmentation is used such that the shortest side is randomly set to at | |
| least 480 and at most 800 pixels. At inference time, the shortest side is set to 800. One can use | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor">DetrImageProcessor</a> to prepare images (and optional annotations in COCO format) for the | |
| model. Due to this resizing, images in a batch can have different sizes. DETR solves this by padding images up to the | |
| largest size in a batch, and by creating a pixel mask that indicates which pixels are real/which are padding. | |
| Alternatively, one can also define a custom <code>collate_fn</code> in order to batch images together, using | |
| <code>~transformers.DetrImageProcessor.pad_and_create_pixel_mask</code>.</li> <li>The size of the images will determine the amount of memory being used, and will thus determine the <code>batch_size</code>. | |
| It is advised to use a batch size of 2 per GPU. See <a href="https://github.com/facebookresearch/detr/issues/150" rel="nofollow">this Github thread</a> for more info.</li>`,po,Me,ms="There are three ways to instantiate a DETR model (depending on what you prefer):",go,je,hs="Option 1: Instantiate DETR with pre-trained weights for entire model",uo,$e,fo,ke,ps="Option 2: Instantiate DETR with randomly initialized weights for Transformer, but pre-trained weights for backbone",_o,ze,bo,Ce,gs="Option 3: Instantiate DETR with randomly initialized weights for backbone + Transformer",wo,Fe,yo,Ie,us="As a summary, consider the following table:",To,Je,fs='<thead><tr><th>Task</th> <th>Object detection</th> <th>Instance segmentation</th> <th>Panoptic segmentation</th></tr></thead> <tbody><tr><td><strong>Description</strong></td> <td>Predicting bounding boxes and class labels around objects in an image</td> <td>Predicting masks around objects (i.e. instances) in an image</td> <td>Predicting masks around both objects (i.e. instances) as well as “stuff” (i.e. background things like trees and roads) in an image</td></tr> <tr><td><strong>Model</strong></td> <td><a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a></td> <td><a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a></td> <td><a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a></td></tr> <tr><td><strong>Example dataset</strong></td> <td>COCO detection</td> <td>COCO detection, COCO panoptic</td> <td>COCO panoptic</td></tr> <tr><td><strong>Format of annotations to provide to</strong> <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor">DetrImageProcessor</a></td> <td>{‘image_id’: <code>int</code>, ‘annotations’: <code>List[Dict]</code>} each Dict being a COCO object annotation</td> <td>{‘image_id’: <code>int</code>, ‘annotations’: <code>List[Dict]</code>} (in case of COCO detection) or {‘file_name’: <code>str</code>, ‘image_id’: <code>int</code>, ‘segments_info’: <code>List[Dict]</code>} (in case of COCO panoptic)</td> <td>{‘file_name’: <code>str</code>, ‘image_id’: <code>int</code>, ‘segments_info’: <code>List[Dict]</code>} and masks_path (path to directory containing PNG files of the masks)</td></tr> <tr><td><strong>Postprocessing</strong> (i.e. converting the output of the model to Pascal VOC format)</td> <td><code>post_process()</code></td> <td><code>post_process_segmentation()</code></td> <td><code>post_process_segmentation()</code>, <code>post_process_panoptic()</code></td></tr> <tr><td><strong>evaluators</strong></td> <td><code>CocoEvaluator</code> with <code>iou_types="bbox"</code></td> <td><code>CocoEvaluator</code> with <code>iou_types="bbox"</code> or <code>"segm"</code></td> <td><code>CocoEvaluator</code> with <code>iou_tupes="bbox"</code> or <code>"segm"</code>, <code>PanopticEvaluator</code></td></tr></tbody>',vo,qe,_s=`In short, one should prepare the data either in COCO detection or COCO panoptic format, then use | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor">DetrImageProcessor</a> to create <code>pixel_values</code>, <code>pixel_mask</code> and optional | |
| <code>labels</code>, which can then be used to train (or fine-tune) a model. For evaluation, one should first convert the | |
| outputs of the model using one of the postprocessing methods of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor">DetrImageProcessor</a>. These can | |
| be provided to either <code>CocoEvaluator</code> or <code>PanopticEvaluator</code>, which allow you to calculate metrics like | |
| mean Average Precision (mAP) and Panoptic Quality (PQ). The latter objects are implemented in the <a href="https://github.com/facebookresearch/detr" rel="nofollow">original repository</a>. See the <a href="https://github.com/NielsRogge/Transformers-Tutorials/tree/master/DETR" rel="nofollow">example notebooks</a> for more info regarding evaluation.`,xo,Ne,Do,Oe,bs="A list of official Hugging Face and community (indicated by 🌎) resources to help you get started with DETR.",Mo,Ue,jo,Ee,ws='<li>All example notebooks illustrating fine-tuning <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> and <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> on a custom dataset can be found <a href="https://github.com/NielsRogge/Transformers-Tutorials/tree/master/DETR" rel="nofollow">here</a>.</li> <li>Scripts for finetuning <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> with <a href="/docs/transformers/main/en/main_classes/trainer#transformers.Trainer">Trainer</a> or <a href="https://huggingface.co/docs/accelerate/index" rel="nofollow">Accelerate</a> can be found <a href="https://github.com/huggingface/transformers/tree/main/examples/pytorch/object-detection" rel="nofollow">here</a>.</li> <li>See also: <a href="../tasks/object_detection">Object detection task guide</a>.</li>',$o,Pe,ys="If you’re interested in submitting a resource to be included here, please feel free to open a Pull Request and we’ll review it! The resource should ideally demonstrate something new instead of duplicating an existing resource.",ko,Ze,zo,C,Re,nn,Dt,Ts=`This is the configuration class to store the configuration of a <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrModel">DetrModel</a>. It is used to instantiate a DETR | |
| model according to the specified arguments, defining the model architecture. Instantiating a configuration with the | |
| defaults will yield a similar configuration to that of the DETR | |
| <a href="https://huggingface.co/facebook/detr-resnet-50" rel="nofollow">facebook/detr-resnet-50</a> architecture.`,sn,Mt,vs=`Configuration objects inherit from <a href="/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> and can be used to control the model outputs. Read the | |
| documentation from <a href="/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a> for more information.`,rn,B,an,G,We,dn,jt,xs='Instantiate a <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a> (or a derived class) from a pre-trained backbone model configuration.',Co,Le,Fo,j,Se,cn,$t,Ds="Constructs a Detr image processor.",ln,A,He,mn,kt,Ms="Preprocess an image or a batch of images so that it can be used by the model.",hn,V,Be,pn,zt,js=`Converts the raw output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> into final bounding boxes in (top_left_x, top_left_y, | |
| bottom_right_x, bottom_right_y) format. Only supports PyTorch.`,gn,X,Ge,un,Ct,$s='Converts the output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> into semantic segmentation maps. Only supports PyTorch.',fn,Y,Ae,_n,Ft,ks='Converts the output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> into instance segmentation predictions. Only supports PyTorch.',bn,Q,Ve,wn,It,zs=`Converts the output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> into image panoptic segmentation predictions. Only supports | |
| PyTorch.`,Io,Xe,Jo,$,Ye,yn,K,Qe,Tn,Jt,Cs="Preprocess an image or a batch of images.",vn,ee,Ke,xn,qt,Fs=`Converts the raw output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> into final bounding boxes in (top_left_x, top_left_y, | |
| bottom_right_x, bottom_right_y) format. Only supports PyTorch.`,Dn,te,et,Mn,Nt,Is='Converts the output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> into semantic segmentation maps. Only supports PyTorch.',jn,oe,tt,$n,Ot,Js='Converts the output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> into instance segmentation predictions. Only supports PyTorch.',kn,ne,ot,zn,Ut,qs=`Converts the output of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> into image panoptic segmentation predictions. Only supports | |
| PyTorch.`,qo,nt,No,L,st,Cn,Et,Ns=`Base class for outputs of the DETR encoder-decoder model. This class adds one attribute to Seq2SeqModelOutput, | |
| namely an optional stack of intermediate decoder activations, i.e. the output of each decoder layer, each of them | |
| gone through a layernorm. This is useful when training the model with auxiliary decoding losses.`,Oo,S,rt,Fn,Pt,Os='Output type of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a>.',Uo,H,at,In,Zt,Us='Output type of <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>.',Eo,it,Po,F,dt,Jn,Rt,Es=`The bare DETR Model (consisting of a backbone and encoder-decoder Transformer) outputting raw hidden-states without | |
| any specific head on top.`,qn,Wt,Ps=`This model inherits from <a href="/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,Nn,Lt,Zs=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,On,N,ct,Un,St,Rs='The <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrModel">DetrModel</a> forward method, overrides the <code>__call__</code> special method.',En,se,Pn,re,Zo,lt,Ro,I,mt,Zn,Ht,Ws=`DETR Model (consisting of a backbone and encoder-decoder Transformer) with object detection heads on top, for tasks | |
| such as COCO detection.`,Rn,Bt,Ls=`This model inherits from <a href="/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,Wn,Gt,Ss=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,Ln,O,ht,Sn,At,Hs='The <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForObjectDetection">DetrForObjectDetection</a> forward method, overrides the <code>__call__</code> special method.',Hn,ae,Bn,ie,Wo,pt,Lo,J,gt,Gn,Vt,Bs=`DETR Model (consisting of a backbone and encoder-decoder Transformer) with a segmentation head on top, for tasks | |
| such as COCO panoptic.`,An,Xt,Gs=`This model inherits from <a href="/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel">PreTrainedModel</a>. Check the superclass documentation for the generic methods the | |
| library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads | |
| etc.)`,Vn,Yt,As=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass. | |
| Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage | |
| and behavior.`,Xn,U,ut,Yn,Qt,Vs='The <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a> forward method, overrides the <code>__call__</code> special method.',Qn,de,Kn,ce,So,ft,Ho,Kt,Bo;return y=new P({props:{title:"DETR",local:"detr",headingTag:"h1"}}),D=new P({props:{title:"Overview",local:"overview",headingTag:"h2"}}),_e=new P({props:{title:"How DETR works",local:"how-detr-works",headingTag:"h2"}}),xe=new P({props:{title:"Usage tips",local:"usage-tips",headingTag:"h2"}}),$e=new vt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERldHJGb3JPYmplY3REZXRlY3Rpb24lMEElMEFtb2RlbCUyMCUzRCUyMERldHJGb3JPYmplY3REZXRlY3Rpb24uZnJvbV9wcmV0cmFpbmVkKCUyMmZhY2Vib29rJTJGZGV0ci1yZXNuZXQtNTAlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DetrForObjectDetection | |
| <span class="hljs-meta">>>> </span>model = DetrForObjectDetection.from_pretrained(<span class="hljs-string">"facebook/detr-resnet-50"</span>)`,wrap:!1}}),ze=new vt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERldHJDb25maWclMkMlMjBEZXRyRm9yT2JqZWN0RGV0ZWN0aW9uJTBBJTBBY29uZmlnJTIwJTNEJTIwRGV0ckNvbmZpZygpJTBBbW9kZWwlMjAlM0QlMjBEZXRyRm9yT2JqZWN0RGV0ZWN0aW9uKGNvbmZpZyk=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DetrConfig, DetrForObjectDetection | |
| <span class="hljs-meta">>>> </span>config = DetrConfig() | |
| <span class="hljs-meta">>>> </span>model = DetrForObjectDetection(config)`,wrap:!1}}),Fe=new vt({props:{code:"Y29uZmlnJTIwJTNEJTIwRGV0ckNvbmZpZyh1c2VfcHJldHJhaW5lZF9iYWNrYm9uZSUzREZhbHNlKSUwQW1vZGVsJTIwJTNEJTIwRGV0ckZvck9iamVjdERldGVjdGlvbihjb25maWcp",highlighted:`<span class="hljs-meta">>>> </span>config = DetrConfig(use_pretrained_backbone=<span class="hljs-literal">False</span>) | |
| <span class="hljs-meta">>>> </span>model = DetrForObjectDetection(config)`,wrap:!1}}),Ne=new P({props:{title:"Resources",local:"resources",headingTag:"h2"}}),Ue=new tr({props:{pipeline:"object-detection"}}),Ze=new P({props:{title:"DetrConfig",local:"transformers.DetrConfig",headingTag:"h2"}}),Re=new M({props:{name:"class transformers.DetrConfig",anchor:"transformers.DetrConfig",parameters:[{name:"use_timm_backbone",val:" = True"},{name:"backbone_config",val:" = None"},{name:"num_channels",val:" = 3"},{name:"num_queries",val:" = 100"},{name:"encoder_layers",val:" = 6"},{name:"encoder_ffn_dim",val:" = 2048"},{name:"encoder_attention_heads",val:" = 8"},{name:"decoder_layers",val:" = 6"},{name:"decoder_ffn_dim",val:" = 2048"},{name:"decoder_attention_heads",val:" = 8"},{name:"encoder_layerdrop",val:" = 0.0"},{name:"decoder_layerdrop",val:" = 0.0"},{name:"is_encoder_decoder",val:" = True"},{name:"activation_function",val:" = 'relu'"},{name:"d_model",val:" = 256"},{name:"dropout",val:" = 0.1"},{name:"attention_dropout",val:" = 0.0"},{name:"activation_dropout",val:" = 0.0"},{name:"init_std",val:" = 0.02"},{name:"init_xavier_std",val:" = 1.0"},{name:"auxiliary_loss",val:" = False"},{name:"position_embedding_type",val:" = 'sine'"},{name:"backbone",val:" = 'resnet50'"},{name:"use_pretrained_backbone",val:" = True"},{name:"backbone_kwargs",val:" = None"},{name:"dilation",val:" = False"},{name:"class_cost",val:" = 1"},{name:"bbox_cost",val:" = 5"},{name:"giou_cost",val:" = 2"},{name:"mask_loss_coefficient",val:" = 1"},{name:"dice_loss_coefficient",val:" = 1"},{name:"bbox_loss_coefficient",val:" = 5"},{name:"giou_loss_coefficient",val:" = 2"},{name:"eos_coefficient",val:" = 0.1"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DetrConfig.use_timm_backbone",description:`<strong>use_timm_backbone</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Whether or not to use the <code>timm</code> library for the backbone. If set to <code>False</code>, will use the <a href="/docs/transformers/main/en/main_classes/backbones#transformers.AutoBackbone">AutoBackbone</a> | |
| API.`,name:"use_timm_backbone"},{anchor:"transformers.DetrConfig.backbone_config",description:`<strong>backbone_config</strong> (<code>PretrainedConfig</code> or <code>dict</code>, <em>optional</em>) — | |
| The configuration of the backbone model. Only used in case <code>use_timm_backbone</code> is set to <code>False</code> in which | |
| case it will default to <code>ResNetConfig()</code>.`,name:"backbone_config"},{anchor:"transformers.DetrConfig.num_channels",description:`<strong>num_channels</strong> (<code>int</code>, <em>optional</em>, defaults to 3) — | |
| The number of input channels.`,name:"num_channels"},{anchor:"transformers.DetrConfig.num_queries",description:`<strong>num_queries</strong> (<code>int</code>, <em>optional</em>, defaults to 100) — | |
| Number of object queries, i.e. detection slots. This is the maximal number of objects <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrModel">DetrModel</a> can | |
| detect in a single image. For COCO, we recommend 100 queries.`,name:"num_queries"},{anchor:"transformers.DetrConfig.d_model",description:`<strong>d_model</strong> (<code>int</code>, <em>optional</em>, defaults to 256) — | |
| Dimension of the layers.`,name:"d_model"},{anchor:"transformers.DetrConfig.encoder_layers",description:`<strong>encoder_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 6) — | |
| Number of encoder layers.`,name:"encoder_layers"},{anchor:"transformers.DetrConfig.decoder_layers",description:`<strong>decoder_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 6) — | |
| Number of decoder layers.`,name:"decoder_layers"},{anchor:"transformers.DetrConfig.encoder_attention_heads",description:`<strong>encoder_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| Number of attention heads for each attention layer in the Transformer encoder.`,name:"encoder_attention_heads"},{anchor:"transformers.DetrConfig.decoder_attention_heads",description:`<strong>decoder_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 8) — | |
| Number of attention heads for each attention layer in the Transformer decoder.`,name:"decoder_attention_heads"},{anchor:"transformers.DetrConfig.decoder_ffn_dim",description:`<strong>decoder_ffn_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 2048) — | |
| Dimension of the “intermediate” (often named feed-forward) layer in decoder.`,name:"decoder_ffn_dim"},{anchor:"transformers.DetrConfig.encoder_ffn_dim",description:`<strong>encoder_ffn_dim</strong> (<code>int</code>, <em>optional</em>, defaults to 2048) — | |
| Dimension of the “intermediate” (often named feed-forward) layer in decoder.`,name:"encoder_ffn_dim"},{anchor:"transformers.DetrConfig.activation_function",description:`<strong>activation_function</strong> (<code>str</code> or <code>function</code>, <em>optional</em>, defaults to <code>"relu"</code>) — | |
| The non-linear activation function (function or string) in the encoder and pooler. If string, <code>"gelu"</code>, | |
| <code>"relu"</code>, <code>"silu"</code> and <code>"gelu_new"</code> are supported.`,name:"activation_function"},{anchor:"transformers.DetrConfig.dropout",description:`<strong>dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.`,name:"dropout"},{anchor:"transformers.DetrConfig.attention_dropout",description:`<strong>attention_dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The dropout ratio for the attention probabilities.`,name:"attention_dropout"},{anchor:"transformers.DetrConfig.activation_dropout",description:`<strong>activation_dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The dropout ratio for activations inside the fully connected layer.`,name:"activation_dropout"},{anchor:"transformers.DetrConfig.init_std",description:`<strong>init_std</strong> (<code>float</code>, <em>optional</em>, defaults to 0.02) — | |
| The standard deviation of the truncated_normal_initializer for initializing all weight matrices.`,name:"init_std"},{anchor:"transformers.DetrConfig.init_xavier_std",description:`<strong>init_xavier_std</strong> (<code>float</code>, <em>optional</em>, defaults to 1) — | |
| The scaling factor used for the Xavier initialization gain in the HM Attention map module.`,name:"init_xavier_std"},{anchor:"transformers.DetrConfig.encoder_layerdrop",description:`<strong>encoder_layerdrop</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The LayerDrop probability for the encoder. See the [LayerDrop paper](see <a href="https://arxiv.org/abs/1909.11556" rel="nofollow">https://arxiv.org/abs/1909.11556</a>) | |
| for more details.`,name:"encoder_layerdrop"},{anchor:"transformers.DetrConfig.decoder_layerdrop",description:`<strong>decoder_layerdrop</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) — | |
| The LayerDrop probability for the decoder. See the [LayerDrop paper](see <a href="https://arxiv.org/abs/1909.11556" rel="nofollow">https://arxiv.org/abs/1909.11556</a>) | |
| for more details.`,name:"decoder_layerdrop"},{anchor:"transformers.DetrConfig.auxiliary_loss",description:`<strong>auxiliary_loss</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether auxiliary decoding losses (loss at each decoder layer) are to be used.`,name:"auxiliary_loss"},{anchor:"transformers.DetrConfig.position_embedding_type",description:`<strong>position_embedding_type</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"sine"</code>) — | |
| Type of position embeddings to be used on top of the image features. One of <code>"sine"</code> or <code>"learned"</code>.`,name:"position_embedding_type"},{anchor:"transformers.DetrConfig.backbone",description:`<strong>backbone</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"resnet50"</code>) — | |
| Name of backbone to use when <code>backbone_config</code> is <code>None</code>. If <code>use_pretrained_backbone</code> is <code>True</code>, this | |
| will load the corresponding pretrained weights from the timm or transformers library. If <code>use_pretrained_backbone</code> | |
| is <code>False</code>, this loads the backbone’s config and uses that to initialize the backbone with random weights.`,name:"backbone"},{anchor:"transformers.DetrConfig.use_pretrained_backbone",description:`<strong>use_pretrained_backbone</strong> (<code>bool</code>, <em>optional</em>, <code>True</code>) — | |
| Whether to use pretrained weights for the backbone.`,name:"use_pretrained_backbone"},{anchor:"transformers.DetrConfig.backbone_kwargs",description:`<strong>backbone_kwargs</strong> (<code>dict</code>, <em>optional</em>) — | |
| Keyword arguments to be passed to AutoBackbone when loading from a checkpoint | |
| e.g. <code>{'out_indices': (0, 1, 2, 3)}</code>. Cannot be specified if <code>backbone_config</code> is set.`,name:"backbone_kwargs"},{anchor:"transformers.DetrConfig.dilation",description:`<strong>dilation</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) — | |
| Whether to replace stride with dilation in the last convolutional block (DC5). Only supported when | |
| <code>use_timm_backbone</code> = <code>True</code>.`,name:"dilation"},{anchor:"transformers.DetrConfig.class_cost",description:`<strong>class_cost</strong> (<code>float</code>, <em>optional</em>, defaults to 1) — | |
| Relative weight of the classification error in the Hungarian matching cost.`,name:"class_cost"},{anchor:"transformers.DetrConfig.bbox_cost",description:`<strong>bbox_cost</strong> (<code>float</code>, <em>optional</em>, defaults to 5) — | |
| Relative weight of the L1 error of the bounding box coordinates in the Hungarian matching cost.`,name:"bbox_cost"},{anchor:"transformers.DetrConfig.giou_cost",description:`<strong>giou_cost</strong> (<code>float</code>, <em>optional</em>, defaults to 2) — | |
| Relative weight of the generalized IoU loss of the bounding box in the Hungarian matching cost.`,name:"giou_cost"},{anchor:"transformers.DetrConfig.mask_loss_coefficient",description:`<strong>mask_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 1) — | |
| Relative weight of the Focal loss in the panoptic segmentation loss.`,name:"mask_loss_coefficient"},{anchor:"transformers.DetrConfig.dice_loss_coefficient",description:`<strong>dice_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 1) — | |
| Relative weight of the DICE/F-1 loss in the panoptic segmentation loss.`,name:"dice_loss_coefficient"},{anchor:"transformers.DetrConfig.bbox_loss_coefficient",description:`<strong>bbox_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 5) — | |
| Relative weight of the L1 bounding box loss in the object detection loss.`,name:"bbox_loss_coefficient"},{anchor:"transformers.DetrConfig.giou_loss_coefficient",description:`<strong>giou_loss_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 2) — | |
| Relative weight of the generalized IoU loss in the object detection loss.`,name:"giou_loss_coefficient"},{anchor:"transformers.DetrConfig.eos_coefficient",description:`<strong>eos_coefficient</strong> (<code>float</code>, <em>optional</em>, defaults to 0.1) — | |
| Relative classification weight of the ‘no-object’ class in the object detection loss.`,name:"eos_coefficient"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/configuration_detr.py#L32"}}),B=new on({props:{anchor:"transformers.DetrConfig.example",$$slots:{default:[nr]},$$scope:{ctx:z}}}),We=new M({props:{name:"from_backbone_config",anchor:"transformers.DetrConfig.from_backbone_config",parameters:[{name:"backbone_config",val:": PretrainedConfig"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DetrConfig.from_backbone_config.backbone_config",description:`<strong>backbone_config</strong> (<a href="/docs/transformers/main/en/main_classes/configuration#transformers.PretrainedConfig">PretrainedConfig</a>) — | |
| The backbone configuration.`,name:"backbone_config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/configuration_detr.py#L255",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>An instance of a configuration object</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig" | |
| >DetrConfig</a></p> | |
| `}}),Le=new P({props:{title:"DetrImageProcessor",local:"transformers.DetrImageProcessor",headingTag:"h2"}}),Se=new M({props:{name:"class transformers.DetrImageProcessor",anchor:"transformers.DetrImageProcessor",parameters:[{name:"format",val:": Union = <AnnotationFormat.COCO_DETECTION: 'coco_detection'>"},{name:"do_resize",val:": bool = True"},{name:"size",val:": Dict = None"},{name:"resample",val:": Resampling = <Resampling.BILINEAR: 2>"},{name:"do_rescale",val:": bool = True"},{name:"rescale_factor",val:": Union = 0.00392156862745098"},{name:"do_normalize",val:": bool = True"},{name:"image_mean",val:": Union = None"},{name:"image_std",val:": Union = None"},{name:"do_convert_annotations",val:": Optional = None"},{name:"do_pad",val:": bool = True"},{name:"pad_size",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DetrImageProcessor.format",description:`<strong>format</strong> (<code>str</code>, <em>optional</em>, defaults to <code>"coco_detection"</code>) — | |
| Data format of the annotations. One of “coco_detection” or “coco_panoptic”.`,name:"format"},{anchor:"transformers.DetrImageProcessor.do_resize",description:`<strong>do_resize</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Controls whether to resize the image’s <code>(height, width)</code> dimensions to the specified <code>size</code>. Can be | |
| overridden by the <code>do_resize</code> parameter in the <code>preprocess</code> method.`,name:"do_resize"},{anchor:"transformers.DetrImageProcessor.size",description:`<strong>size</strong> (<code>Dict[str, int]</code> <em>optional</em>, defaults to <code>{"shortest_edge" -- 800, "longest_edge": 1333}</code>): | |
| Size of the image’s <code>(height, width)</code> dimensions after resizing. Can be overridden by the <code>size</code> parameter | |
| in the <code>preprocess</code> method. Available options are:<ul> | |
| <li><code>{"height": int, "width": int}</code>: The image will be resized to the exact size <code>(height, width)</code>. | |
| Do NOT keep the aspect ratio.</li> | |
| <li><code>{"shortest_edge": int, "longest_edge": int}</code>: The image will be resized to a maximum size respecting | |
| the aspect ratio and keeping the shortest edge less or equal to <code>shortest_edge</code> and the longest edge | |
| less or equal to <code>longest_edge</code>.</li> | |
| <li><code>{"max_height": int, "max_width": int}</code>: The image will be resized to the maximum size respecting the | |
| aspect ratio and keeping the height less or equal to <code>max_height</code> and the width less or equal to | |
| <code>max_width</code>.</li> | |
| </ul>`,name:"size"},{anchor:"transformers.DetrImageProcessor.resample",description:`<strong>resample</strong> (<code>PILImageResampling</code>, <em>optional</em>, defaults to <code>PILImageResampling.BILINEAR</code>) — | |
| Resampling filter to use if resizing the image.`,name:"resample"},{anchor:"transformers.DetrImageProcessor.do_rescale",description:`<strong>do_rescale</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Controls whether to rescale the image by the specified scale <code>rescale_factor</code>. Can be overridden by the | |
| <code>do_rescale</code> parameter in the <code>preprocess</code> method.`,name:"do_rescale"},{anchor:"transformers.DetrImageProcessor.rescale_factor",description:`<strong>rescale_factor</strong> (<code>int</code> or <code>float</code>, <em>optional</em>, defaults to <code>1/255</code>) — | |
| Scale factor to use if rescaling the image. Can be overridden by the <code>rescale_factor</code> parameter in the | |
| <code>preprocess</code> method.`,name:"rescale_factor"},{anchor:"transformers.DetrImageProcessor.do_normalize",description:`<strong>do_normalize</strong> (<code>bool</code>, <em>optional</em>, defaults to True) — | |
| Controls whether to normalize the image. Can be overridden by the <code>do_normalize</code> parameter in the | |
| <code>preprocess</code> method.`,name:"do_normalize"},{anchor:"transformers.DetrImageProcessor.image_mean",description:`<strong>image_mean</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to <code>IMAGENET_DEFAULT_MEAN</code>) — | |
| Mean values to use when normalizing the image. Can be a single value or a list of values, one for each | |
| channel. Can be overridden by the <code>image_mean</code> parameter in the <code>preprocess</code> method.`,name:"image_mean"},{anchor:"transformers.DetrImageProcessor.image_std",description:`<strong>image_std</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to <code>IMAGENET_DEFAULT_STD</code>) — | |
| Standard deviation values to use when normalizing the image. Can be a single value or a list of values, one | |
| for each channel. Can be overridden by the <code>image_std</code> parameter in the <code>preprocess</code> method.`,name:"image_std"},{anchor:"transformers.DetrImageProcessor.do_convert_annotations",description:`<strong>do_convert_annotations</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Controls whether to convert the annotations to the format expected by the DETR model. Converts the | |
| bounding boxes to the format <code>(center_x, center_y, width, height)</code> and in the range <code>[0, 1]</code>. | |
| Can be overridden by the <code>do_convert_annotations</code> parameter in the <code>preprocess</code> method.`,name:"do_convert_annotations"},{anchor:"transformers.DetrImageProcessor.do_pad",description:`<strong>do_pad</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) — | |
| Controls whether to pad the image. Can be overridden by the <code>do_pad</code> parameter in the <code>preprocess</code> | |
| method. If <code>True</code>, padding will be applied to the bottom and right of the image with zeros. | |
| If <code>pad_size</code> is provided, the image will be padded to the specified dimensions. | |
| Otherwise, the image will be padded to the maximum height and width of the batch.`,name:"do_pad"},{anchor:"transformers.DetrImageProcessor.pad_size",description:`<strong>pad_size</strong> (<code>Dict[str, int]</code>, <em>optional</em>) — | |
| The size <code>{"height": int, "width" int}</code> to pad the images to. Must be larger than any image size | |
| provided for preprocessing. If <code>pad_size</code> is not provided, images will be padded to the largest | |
| height and width in the batch.`,name:"pad_size"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L787"}}),He=new M({props:{name:"preprocess",anchor:"transformers.DetrImageProcessor.preprocess",parameters:[{name:"images",val:": Union"},{name:"annotations",val:": Union = None"},{name:"return_segmentation_masks",val:": bool = None"},{name:"masks_path",val:": Union = None"},{name:"do_resize",val:": Optional = None"},{name:"size",val:": Optional = None"},{name:"resample",val:" = None"},{name:"do_rescale",val:": Optional = None"},{name:"rescale_factor",val:": Union = None"},{name:"do_normalize",val:": Optional = None"},{name:"do_convert_annotations",val:": Optional = None"},{name:"image_mean",val:": Union = None"},{name:"image_std",val:": Union = None"},{name:"do_pad",val:": Optional = None"},{name:"format",val:": Union = None"},{name:"return_tensors",val:": Union = None"},{name:"data_format",val:": Union = <ChannelDimension.FIRST: 'channels_first'>"},{name:"input_data_format",val:": Union = None"},{name:"pad_size",val:": Optional = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.DetrImageProcessor.preprocess.images",description:`<strong>images</strong> (<code>ImageInput</code>) — | |
| Image or batch of images to preprocess. Expects a single or batch of images with pixel values ranging | |
| from 0 to 255. If passing in images with pixel values between 0 and 1, set <code>do_rescale=False</code>.`,name:"images"},{anchor:"transformers.DetrImageProcessor.preprocess.annotations",description:`<strong>annotations</strong> (<code>AnnotationType</code> or <code>List[AnnotationType]</code>, <em>optional</em>) — | |
| List of annotations associated with the image or batch of images. If annotation is for object | |
| detection, the annotations should be a dictionary with the following keys:<ul> | |
| <li>“image_id” (<code>int</code>): The image id.</li> | |
| <li>“annotations” (<code>List[Dict]</code>): List of annotations for an image. Each annotation should be a | |
| dictionary. An image can have no annotations, in which case the list should be empty. | |
| If annotation is for segmentation, the annotations should be a dictionary with the following keys:</li> | |
| <li>“image_id” (<code>int</code>): The image id.</li> | |
| <li>“segments_info” (<code>List[Dict]</code>): List of segments for an image. Each segment should be a dictionary. | |
| An image can have no segments, in which case the list should be empty.</li> | |
| <li>“file_name” (<code>str</code>): The file name of the image.</li> | |
| </ul>`,name:"annotations"},{anchor:"transformers.DetrImageProcessor.preprocess.return_segmentation_masks",description:`<strong>return_segmentation_masks</strong> (<code>bool</code>, <em>optional</em>, defaults to self.return_segmentation_masks) — | |
| Whether to return segmentation masks.`,name:"return_segmentation_masks"},{anchor:"transformers.DetrImageProcessor.preprocess.masks_path",description:`<strong>masks_path</strong> (<code>str</code> or <code>pathlib.Path</code>, <em>optional</em>) — | |
| Path to the directory containing the segmentation masks.`,name:"masks_path"},{anchor:"transformers.DetrImageProcessor.preprocess.do_resize",description:`<strong>do_resize</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_resize) — | |
| Whether to resize the image.`,name:"do_resize"},{anchor:"transformers.DetrImageProcessor.preprocess.size",description:`<strong>size</strong> (<code>Dict[str, int]</code>, <em>optional</em>, defaults to self.size) — | |
| Size of the image’s <code>(height, width)</code> dimensions after resizing. Available options are:<ul> | |
| <li><code>{"height": int, "width": int}</code>: The image will be resized to the exact size <code>(height, width)</code>. | |
| Do NOT keep the aspect ratio.</li> | |
| <li><code>{"shortest_edge": int, "longest_edge": int}</code>: The image will be resized to a maximum size respecting | |
| the aspect ratio and keeping the shortest edge less or equal to <code>shortest_edge</code> and the longest edge | |
| less or equal to <code>longest_edge</code>.</li> | |
| <li><code>{"max_height": int, "max_width": int}</code>: The image will be resized to the maximum size respecting the | |
| aspect ratio and keeping the height less or equal to <code>max_height</code> and the width less or equal to | |
| <code>max_width</code>.</li> | |
| </ul>`,name:"size"},{anchor:"transformers.DetrImageProcessor.preprocess.resample",description:`<strong>resample</strong> (<code>PILImageResampling</code>, <em>optional</em>, defaults to self.resample) — | |
| Resampling filter to use when resizing the image.`,name:"resample"},{anchor:"transformers.DetrImageProcessor.preprocess.do_rescale",description:`<strong>do_rescale</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_rescale) — | |
| Whether to rescale the image.`,name:"do_rescale"},{anchor:"transformers.DetrImageProcessor.preprocess.rescale_factor",description:`<strong>rescale_factor</strong> (<code>float</code>, <em>optional</em>, defaults to self.rescale_factor) — | |
| Rescale factor to use when rescaling the image.`,name:"rescale_factor"},{anchor:"transformers.DetrImageProcessor.preprocess.do_normalize",description:`<strong>do_normalize</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_normalize) — | |
| Whether to normalize the image.`,name:"do_normalize"},{anchor:"transformers.DetrImageProcessor.preprocess.do_convert_annotations",description:`<strong>do_convert_annotations</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_convert_annotations) — | |
| Whether to convert the annotations to the format expected by the model. Converts the bounding | |
| boxes from the format <code>(top_left_x, top_left_y, width, height)</code> to <code>(center_x, center_y, width, height)</code> | |
| and in relative coordinates.`,name:"do_convert_annotations"},{anchor:"transformers.DetrImageProcessor.preprocess.image_mean",description:`<strong>image_mean</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to self.image_mean) — | |
| Mean to use when normalizing the image.`,name:"image_mean"},{anchor:"transformers.DetrImageProcessor.preprocess.image_std",description:`<strong>image_std</strong> (<code>float</code> or <code>List[float]</code>, <em>optional</em>, defaults to self.image_std) — | |
| Standard deviation to use when normalizing the image.`,name:"image_std"},{anchor:"transformers.DetrImageProcessor.preprocess.do_pad",description:`<strong>do_pad</strong> (<code>bool</code>, <em>optional</em>, defaults to self.do_pad) — | |
| Whether to pad the image. If <code>True</code>, padding will be applied to the bottom and right of | |
| the image with zeros. If <code>pad_size</code> is provided, the image will be padded to the specified | |
| dimensions. Otherwise, the image will be padded to the maximum height and width of the batch.`,name:"do_pad"},{anchor:"transformers.DetrImageProcessor.preprocess.format",description:`<strong>format</strong> (<code>str</code> or <code>AnnotationFormat</code>, <em>optional</em>, defaults to self.format) — | |
| Format of the annotations.`,name:"format"},{anchor:"transformers.DetrImageProcessor.preprocess.return_tensors",description:`<strong>return_tensors</strong> (<code>str</code> or <code>TensorType</code>, <em>optional</em>, defaults to self.return_tensors) — | |
| Type of tensors to return. If <code>None</code>, will return the list of images.`,name:"return_tensors"},{anchor:"transformers.DetrImageProcessor.preprocess.data_format",description:`<strong>data_format</strong> (<code>ChannelDimension</code> or <code>str</code>, <em>optional</em>, defaults to <code>ChannelDimension.FIRST</code>) — | |
| The channel dimension format for the output image. Can be one of:<ul> | |
| <li><code>"channels_first"</code> or <code>ChannelDimension.FIRST</code>: image in (num_channels, height, width) format.</li> | |
| <li><code>"channels_last"</code> or <code>ChannelDimension.LAST</code>: image in (height, width, num_channels) format.</li> | |
| <li>Unset: Use the channel dimension format of the input image.</li> | |
| </ul>`,name:"data_format"},{anchor:"transformers.DetrImageProcessor.preprocess.input_data_format",description:`<strong>input_data_format</strong> (<code>ChannelDimension</code> or <code>str</code>, <em>optional</em>) — | |
| The channel dimension format for the input image. If unset, the channel dimension format is inferred | |
| from the input image. Can be one of:<ul> | |
| <li><code>"channels_first"</code> or <code>ChannelDimension.FIRST</code>: image in (num_channels, height, width) format.</li> | |
| <li><code>"channels_last"</code> or <code>ChannelDimension.LAST</code>: image in (height, width, num_channels) format.</li> | |
| <li><code>"none"</code> or <code>ChannelDimension.NONE</code>: image in (height, width) format.</li> | |
| </ul>`,name:"input_data_format"},{anchor:"transformers.DetrImageProcessor.preprocess.pad_size",description:`<strong>pad_size</strong> (<code>Dict[str, int]</code>, <em>optional</em>) — | |
| The size <code>{"height": int, "width" int}</code> to pad the images to. Must be larger than any image size | |
| provided for preprocessing. If <code>pad_size</code> is not provided, images will be padded to the largest | |
| height and width in the batch.`,name:"pad_size"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1236"}}),Be=new M({props:{name:"post_process_object_detection",anchor:"transformers.DetrImageProcessor.post_process_object_detection",parameters:[{name:"outputs",val:""},{name:"threshold",val:": float = 0.5"},{name:"target_sizes",val:": Union = None"}],parametersDescription:[{anchor:"transformers.DetrImageProcessor.post_process_object_detection.outputs",description:`<strong>outputs</strong> (<code>DetrObjectDetectionOutput</code>) — | |
| Raw outputs of the model.`,name:"outputs"},{anchor:"transformers.DetrImageProcessor.post_process_object_detection.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>) — | |
| Score threshold to keep object detection predictions.`,name:"threshold"},{anchor:"transformers.DetrImageProcessor.post_process_object_detection.target_sizes",description:`<strong>target_sizes</strong> (<code>torch.Tensor</code> or <code>List[Tuple[int, int]]</code>, <em>optional</em>) — | |
| Tensor of shape <code>(batch_size, 2)</code> or list of tuples (<code>Tuple[int, int]</code>) containing the target size | |
| <code>(height, width)</code> of each image in the batch. If unset, predictions will not be resized.`,name:"target_sizes"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1773",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of dictionaries, each dictionary containing the scores, labels and boxes for an image | |
| in the batch as predicted by the model.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[Dict]</code></p> | |
| `}}),Ge=new M({props:{name:"post_process_semantic_segmentation",anchor:"transformers.DetrImageProcessor.post_process_semantic_segmentation",parameters:[{name:"outputs",val:""},{name:"target_sizes",val:": List = None"}],parametersDescription:[{anchor:"transformers.DetrImageProcessor.post_process_semantic_segmentation.outputs",description:`<strong>outputs</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>) — | |
| Raw outputs of the model.`,name:"outputs"},{anchor:"transformers.DetrImageProcessor.post_process_semantic_segmentation.target_sizes",description:`<strong>target_sizes</strong> (<code>List[Tuple[int, int]]</code>, <em>optional</em>) — | |
| A list of tuples (<code>Tuple[int, int]</code>) containing the target size (height, width) of each image in the | |
| batch. If unset, predictions will not be resized.`,name:"target_sizes"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1826",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of length <code>batch_size</code>, where each item is a semantic segmentation map of shape (height, width) | |
| corresponding to the target_sizes entry (if <code>target_sizes</code> is specified). Each entry of each | |
| <code>torch.Tensor</code> correspond to a semantic class id.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[torch.Tensor]</code></p> | |
| `}}),Ae=new M({props:{name:"post_process_instance_segmentation",anchor:"transformers.DetrImageProcessor.post_process_instance_segmentation",parameters:[{name:"outputs",val:""},{name:"threshold",val:": float = 0.5"},{name:"mask_threshold",val:": float = 0.5"},{name:"overlap_mask_area_threshold",val:": float = 0.8"},{name:"target_sizes",val:": Optional = None"},{name:"return_coco_annotation",val:": Optional = False"}],parametersDescription:[{anchor:"transformers.DetrImageProcessor.post_process_instance_segmentation.outputs",description:`<strong>outputs</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>) — | |
| Raw outputs of the model.`,name:"outputs"},{anchor:"transformers.DetrImageProcessor.post_process_instance_segmentation.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| The probability score threshold to keep predicted instance masks.`,name:"threshold"},{anchor:"transformers.DetrImageProcessor.post_process_instance_segmentation.mask_threshold",description:`<strong>mask_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| Threshold to use when turning the predicted masks into binary values.`,name:"mask_threshold"},{anchor:"transformers.DetrImageProcessor.post_process_instance_segmentation.overlap_mask_area_threshold",description:`<strong>overlap_mask_area_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.8) — | |
| The overlap mask area threshold to merge or discard small disconnected parts within each binary | |
| instance mask.`,name:"overlap_mask_area_threshold"},{anchor:"transformers.DetrImageProcessor.post_process_instance_segmentation.target_sizes",description:`<strong>target_sizes</strong> (<code>List[Tuple]</code>, <em>optional</em>) — | |
| List of length (batch_size), where each list item (<code>Tuple[int, int]]</code>) corresponds to the requested | |
| final size (height, width) of each prediction. If unset, predictions will not be resized.`,name:"target_sizes"},{anchor:"transformers.DetrImageProcessor.post_process_instance_segmentation.return_coco_annotation",description:`<strong>return_coco_annotation</strong> (<code>bool</code>, <em>optional</em>) — | |
| Defaults to <code>False</code>. If set to <code>True</code>, segmentation maps are returned in COCO run-length encoding (RLE) | |
| format.`,name:"return_coco_annotation"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1874",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of dictionaries, one per image, each dictionary containing two keys:</p> | |
| <ul> | |
| <li><strong>segmentation</strong> — A tensor of shape <code>(height, width)</code> where each pixel represents a <code>segment_id</code> or | |
| <code>List[List]</code> run-length encoding (RLE) of the segmentation map if return_coco_annotation is set to | |
| <code>True</code>. Set to <code>None</code> if no mask if found above <code>threshold</code>.</li> | |
| <li><strong>segments_info</strong> — A dictionary that contains additional information on each segment.<ul> | |
| <li><strong>id</strong> — An integer representing the <code>segment_id</code>.</li> | |
| <li><strong>label_id</strong> — An integer representing the label / semantic class id corresponding to <code>segment_id</code>.</li> | |
| <li><strong>score</strong> — Prediction score of segment with <code>segment_id</code>.</li> | |
| </ul></li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[Dict]</code></p> | |
| `}}),Ve=new M({props:{name:"post_process_panoptic_segmentation",anchor:"transformers.DetrImageProcessor.post_process_panoptic_segmentation",parameters:[{name:"outputs",val:""},{name:"threshold",val:": float = 0.5"},{name:"mask_threshold",val:": float = 0.5"},{name:"overlap_mask_area_threshold",val:": float = 0.8"},{name:"label_ids_to_fuse",val:": Optional = None"},{name:"target_sizes",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DetrImageProcessor.post_process_panoptic_segmentation.outputs",description:`<strong>outputs</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>) — | |
| The outputs from <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>.`,name:"outputs"},{anchor:"transformers.DetrImageProcessor.post_process_panoptic_segmentation.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| The probability score threshold to keep predicted instance masks.`,name:"threshold"},{anchor:"transformers.DetrImageProcessor.post_process_panoptic_segmentation.mask_threshold",description:`<strong>mask_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| Threshold to use when turning the predicted masks into binary values.`,name:"mask_threshold"},{anchor:"transformers.DetrImageProcessor.post_process_panoptic_segmentation.overlap_mask_area_threshold",description:`<strong>overlap_mask_area_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.8) — | |
| The overlap mask area threshold to merge or discard small disconnected parts within each binary | |
| instance mask.`,name:"overlap_mask_area_threshold"},{anchor:"transformers.DetrImageProcessor.post_process_panoptic_segmentation.label_ids_to_fuse",description:`<strong>label_ids_to_fuse</strong> (<code>Set[int]</code>, <em>optional</em>) — | |
| The labels in this state will have all their instances be fused together. For instance we could say | |
| there can only be one sky in an image, but several persons, so the label ID for sky would be in that | |
| set, but not the one for person.`,name:"label_ids_to_fuse"},{anchor:"transformers.DetrImageProcessor.post_process_panoptic_segmentation.target_sizes",description:`<strong>target_sizes</strong> (<code>List[Tuple]</code>, <em>optional</em>) — | |
| List of length (batch_size), where each list item (<code>Tuple[int, int]]</code>) corresponds to the requested | |
| final size (height, width) of each prediction in batch. If unset, predictions will not be resized.`,name:"target_sizes"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1958",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of dictionaries, one per image, each dictionary containing two keys:</p> | |
| <ul> | |
| <li><strong>segmentation</strong> — a tensor of shape <code>(height, width)</code> where each pixel represents a <code>segment_id</code> or | |
| <code>None</code> if no mask if found above <code>threshold</code>. If <code>target_sizes</code> is specified, segmentation is resized to | |
| the corresponding <code>target_sizes</code> entry.</li> | |
| <li><strong>segments_info</strong> — A dictionary that contains additional information on each segment.<ul> | |
| <li><strong>id</strong> — an integer representing the <code>segment_id</code>.</li> | |
| <li><strong>label_id</strong> — An integer representing the label / semantic class id corresponding to <code>segment_id</code>.</li> | |
| <li><strong>was_fused</strong> — a boolean, <code>True</code> if <code>label_id</code> was in <code>label_ids_to_fuse</code>, <code>False</code> otherwise. | |
| Multiple instances of the same class / label were fused and assigned a single <code>segment_id</code>.</li> | |
| <li><strong>score</strong> — Prediction score of segment with <code>segment_id</code>.</li> | |
| </ul></li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[Dict]</code></p> | |
| `}}),Xe=new P({props:{title:"DetrFeatureExtractor",local:"transformers.DetrFeatureExtractor",headingTag:"h2"}}),Ye=new M({props:{name:"class transformers.DetrFeatureExtractor",anchor:"transformers.DetrFeatureExtractor",parameters:[{name:"*args",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/feature_extraction_detr.py#L36"}}),Qe=new M({props:{name:"__call__",anchor:"transformers.DetrFeatureExtractor.__call__",parameters:[{name:"images",val:""},{name:"**kwargs",val:""}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/image_processing_utils.py#L39"}}),Ke=new M({props:{name:"post_process_object_detection",anchor:"transformers.DetrFeatureExtractor.post_process_object_detection",parameters:[{name:"outputs",val:""},{name:"threshold",val:": float = 0.5"},{name:"target_sizes",val:": Union = None"}],parametersDescription:[{anchor:"transformers.DetrFeatureExtractor.post_process_object_detection.outputs",description:`<strong>outputs</strong> (<code>DetrObjectDetectionOutput</code>) — | |
| Raw outputs of the model.`,name:"outputs"},{anchor:"transformers.DetrFeatureExtractor.post_process_object_detection.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>) — | |
| Score threshold to keep object detection predictions.`,name:"threshold"},{anchor:"transformers.DetrFeatureExtractor.post_process_object_detection.target_sizes",description:`<strong>target_sizes</strong> (<code>torch.Tensor</code> or <code>List[Tuple[int, int]]</code>, <em>optional</em>) — | |
| Tensor of shape <code>(batch_size, 2)</code> or list of tuples (<code>Tuple[int, int]</code>) containing the target size | |
| <code>(height, width)</code> of each image in the batch. If unset, predictions will not be resized.`,name:"target_sizes"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1773",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of dictionaries, each dictionary containing the scores, labels and boxes for an image | |
| in the batch as predicted by the model.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[Dict]</code></p> | |
| `}}),et=new M({props:{name:"post_process_semantic_segmentation",anchor:"transformers.DetrFeatureExtractor.post_process_semantic_segmentation",parameters:[{name:"outputs",val:""},{name:"target_sizes",val:": List = None"}],parametersDescription:[{anchor:"transformers.DetrFeatureExtractor.post_process_semantic_segmentation.outputs",description:`<strong>outputs</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>) — | |
| Raw outputs of the model.`,name:"outputs"},{anchor:"transformers.DetrFeatureExtractor.post_process_semantic_segmentation.target_sizes",description:`<strong>target_sizes</strong> (<code>List[Tuple[int, int]]</code>, <em>optional</em>) — | |
| A list of tuples (<code>Tuple[int, int]</code>) containing the target size (height, width) of each image in the | |
| batch. If unset, predictions will not be resized.`,name:"target_sizes"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1826",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of length <code>batch_size</code>, where each item is a semantic segmentation map of shape (height, width) | |
| corresponding to the target_sizes entry (if <code>target_sizes</code> is specified). Each entry of each | |
| <code>torch.Tensor</code> correspond to a semantic class id.</p> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[torch.Tensor]</code></p> | |
| `}}),tt=new M({props:{name:"post_process_instance_segmentation",anchor:"transformers.DetrFeatureExtractor.post_process_instance_segmentation",parameters:[{name:"outputs",val:""},{name:"threshold",val:": float = 0.5"},{name:"mask_threshold",val:": float = 0.5"},{name:"overlap_mask_area_threshold",val:": float = 0.8"},{name:"target_sizes",val:": Optional = None"},{name:"return_coco_annotation",val:": Optional = False"}],parametersDescription:[{anchor:"transformers.DetrFeatureExtractor.post_process_instance_segmentation.outputs",description:`<strong>outputs</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>) — | |
| Raw outputs of the model.`,name:"outputs"},{anchor:"transformers.DetrFeatureExtractor.post_process_instance_segmentation.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| The probability score threshold to keep predicted instance masks.`,name:"threshold"},{anchor:"transformers.DetrFeatureExtractor.post_process_instance_segmentation.mask_threshold",description:`<strong>mask_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| Threshold to use when turning the predicted masks into binary values.`,name:"mask_threshold"},{anchor:"transformers.DetrFeatureExtractor.post_process_instance_segmentation.overlap_mask_area_threshold",description:`<strong>overlap_mask_area_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.8) — | |
| The overlap mask area threshold to merge or discard small disconnected parts within each binary | |
| instance mask.`,name:"overlap_mask_area_threshold"},{anchor:"transformers.DetrFeatureExtractor.post_process_instance_segmentation.target_sizes",description:`<strong>target_sizes</strong> (<code>List[Tuple]</code>, <em>optional</em>) — | |
| List of length (batch_size), where each list item (<code>Tuple[int, int]]</code>) corresponds to the requested | |
| final size (height, width) of each prediction. If unset, predictions will not be resized.`,name:"target_sizes"},{anchor:"transformers.DetrFeatureExtractor.post_process_instance_segmentation.return_coco_annotation",description:`<strong>return_coco_annotation</strong> (<code>bool</code>, <em>optional</em>) — | |
| Defaults to <code>False</code>. If set to <code>True</code>, segmentation maps are returned in COCO run-length encoding (RLE) | |
| format.`,name:"return_coco_annotation"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1874",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of dictionaries, one per image, each dictionary containing two keys:</p> | |
| <ul> | |
| <li><strong>segmentation</strong> — A tensor of shape <code>(height, width)</code> where each pixel represents a <code>segment_id</code> or | |
| <code>List[List]</code> run-length encoding (RLE) of the segmentation map if return_coco_annotation is set to | |
| <code>True</code>. Set to <code>None</code> if no mask if found above <code>threshold</code>.</li> | |
| <li><strong>segments_info</strong> — A dictionary that contains additional information on each segment.<ul> | |
| <li><strong>id</strong> — An integer representing the <code>segment_id</code>.</li> | |
| <li><strong>label_id</strong> — An integer representing the label / semantic class id corresponding to <code>segment_id</code>.</li> | |
| <li><strong>score</strong> — Prediction score of segment with <code>segment_id</code>.</li> | |
| </ul></li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[Dict]</code></p> | |
| `}}),ot=new M({props:{name:"post_process_panoptic_segmentation",anchor:"transformers.DetrFeatureExtractor.post_process_panoptic_segmentation",parameters:[{name:"outputs",val:""},{name:"threshold",val:": float = 0.5"},{name:"mask_threshold",val:": float = 0.5"},{name:"overlap_mask_area_threshold",val:": float = 0.8"},{name:"label_ids_to_fuse",val:": Optional = None"},{name:"target_sizes",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DetrFeatureExtractor.post_process_panoptic_segmentation.outputs",description:`<strong>outputs</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>) — | |
| The outputs from <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrForSegmentation">DetrForSegmentation</a>.`,name:"outputs"},{anchor:"transformers.DetrFeatureExtractor.post_process_panoptic_segmentation.threshold",description:`<strong>threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| The probability score threshold to keep predicted instance masks.`,name:"threshold"},{anchor:"transformers.DetrFeatureExtractor.post_process_panoptic_segmentation.mask_threshold",description:`<strong>mask_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.5) — | |
| Threshold to use when turning the predicted masks into binary values.`,name:"mask_threshold"},{anchor:"transformers.DetrFeatureExtractor.post_process_panoptic_segmentation.overlap_mask_area_threshold",description:`<strong>overlap_mask_area_threshold</strong> (<code>float</code>, <em>optional</em>, defaults to 0.8) — | |
| The overlap mask area threshold to merge or discard small disconnected parts within each binary | |
| instance mask.`,name:"overlap_mask_area_threshold"},{anchor:"transformers.DetrFeatureExtractor.post_process_panoptic_segmentation.label_ids_to_fuse",description:`<strong>label_ids_to_fuse</strong> (<code>Set[int]</code>, <em>optional</em>) — | |
| The labels in this state will have all their instances be fused together. For instance we could say | |
| there can only be one sky in an image, but several persons, so the label ID for sky would be in that | |
| set, but not the one for person.`,name:"label_ids_to_fuse"},{anchor:"transformers.DetrFeatureExtractor.post_process_panoptic_segmentation.target_sizes",description:`<strong>target_sizes</strong> (<code>List[Tuple]</code>, <em>optional</em>) — | |
| List of length (batch_size), where each list item (<code>Tuple[int, int]]</code>) corresponds to the requested | |
| final size (height, width) of each prediction in batch. If unset, predictions will not be resized.`,name:"target_sizes"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/image_processing_detr.py#L1958",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A list of dictionaries, one per image, each dictionary containing two keys:</p> | |
| <ul> | |
| <li><strong>segmentation</strong> — a tensor of shape <code>(height, width)</code> where each pixel represents a <code>segment_id</code> or | |
| <code>None</code> if no mask if found above <code>threshold</code>. If <code>target_sizes</code> is specified, segmentation is resized to | |
| the corresponding <code>target_sizes</code> entry.</li> | |
| <li><strong>segments_info</strong> — A dictionary that contains additional information on each segment.<ul> | |
| <li><strong>id</strong> — an integer representing the <code>segment_id</code>.</li> | |
| <li><strong>label_id</strong> — An integer representing the label / semantic class id corresponding to <code>segment_id</code>.</li> | |
| <li><strong>was_fused</strong> — a boolean, <code>True</code> if <code>label_id</code> was in <code>label_ids_to_fuse</code>, <code>False</code> otherwise. | |
| Multiple instances of the same class / label were fused and assigned a single <code>segment_id</code>.</li> | |
| <li><strong>score</strong> — Prediction score of segment with <code>segment_id</code>.</li> | |
| </ul></li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><code>List[Dict]</code></p> | |
| `}}),nt=new P({props:{title:"DETR specific outputs",local:"transformers.models.detr.modeling_detr.DetrModelOutput",headingTag:"h2"}}),st=new M({props:{name:"class transformers.models.detr.modeling_detr.DetrModelOutput",anchor:"transformers.models.detr.modeling_detr.DetrModelOutput",parameters:[{name:"last_hidden_state",val:": FloatTensor = None"},{name:"past_key_values",val:": Optional = None"},{name:"decoder_hidden_states",val:": Optional = None"},{name:"decoder_attentions",val:": Optional = None"},{name:"cross_attentions",val:": Optional = None"},{name:"encoder_last_hidden_state",val:": Optional = None"},{name:"encoder_hidden_states",val:": Optional = None"},{name:"encoder_attentions",val:": Optional = None"},{name:"intermediate_hidden_states",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.last_hidden_state",description:`<strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — | |
| Sequence of hidden-states at the output of the last layer of the decoder of the model.`,name:"last_hidden_state"},{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.decoder_hidden_states",description:`<strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the decoder at the output of each | |
| layer plus the initial embedding outputs.`,name:"decoder_hidden_states"},{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.decoder_attentions",description:`<strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.`,name:"decoder_attentions"},{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.cross_attentions",description:`<strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder’s cross-attention layer, after the attention softmax, | |
| used to compute the weighted average in the cross-attention heads.`,name:"cross_attentions"},{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.encoder_last_hidden_state",description:`<strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Sequence of hidden-states at the output of the last layer of the encoder of the model.`,name:"encoder_last_hidden_state"},{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each | |
| layer plus the initial embedding outputs.`,name:"encoder_hidden_states"},{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.encoder_attentions",description:`<strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the encoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.`,name:"encoder_attentions"},{anchor:"transformers.models.detr.modeling_detr.DetrModelOutput.intermediate_hidden_states",description:`<strong>intermediate_hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(config.decoder_layers, batch_size, sequence_length, hidden_size)</code>, <em>optional</em>, returned when <code>config.auxiliary_loss=True</code>) — | |
| Intermediate decoder activations, i.e. the output of each decoder layer, each of them gone through a | |
| layernorm.`,name:"intermediate_hidden_states"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L95"}}),rt=new M({props:{name:"class transformers.models.detr.modeling_detr.DetrObjectDetectionOutput",anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput",parameters:[{name:"loss",val:": Optional = None"},{name:"loss_dict",val:": Optional = None"},{name:"logits",val:": FloatTensor = None"},{name:"pred_boxes",val:": FloatTensor = None"},{name:"auxiliary_outputs",val:": Optional = None"},{name:"last_hidden_state",val:": Optional = None"},{name:"decoder_hidden_states",val:": Optional = None"},{name:"decoder_attentions",val:": Optional = None"},{name:"cross_attentions",val:": Optional = None"},{name:"encoder_last_hidden_state",val:": Optional = None"},{name:"encoder_hidden_states",val:": Optional = None"},{name:"encoder_attentions",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.loss",description:`<strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> are provided)) — | |
| Total loss as a linear combination of a negative log-likehood (cross-entropy) for class prediction and a | |
| bounding box loss. The latter is defined as a linear combination of the L1 loss and the generalized | |
| scale-invariant IoU loss.`,name:"loss"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.loss_dict",description:`<strong>loss_dict</strong> (<code>Dict</code>, <em>optional</em>) — | |
| A dictionary containing the individual losses. Useful for logging.`,name:"loss_dict"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.logits",description:`<strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, num_classes + 1)</code>) — | |
| Classification logits (including no-object) for all queries.`,name:"logits"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.pred_boxes",description:`<strong>pred_boxes</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, 4)</code>) — | |
| Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These | |
| values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding | |
| possible padding). You can use <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_object_detection">post_process_object_detection()</a> to retrieve the | |
| unnormalized bounding boxes.`,name:"pred_boxes"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.auxiliary_outputs",description:`<strong>auxiliary_outputs</strong> (<code>list[Dict]</code>, <em>optional</em>) — | |
| Optional, only returned when auxilary losses are activated (i.e. <code>config.auxiliary_loss</code> is set to <code>True</code>) | |
| and labels are provided. It is a list of dictionaries containing the two above keys (<code>logits</code> and | |
| <code>pred_boxes</code>) for each decoder layer.`,name:"auxiliary_outputs"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.last_hidden_state",description:`<strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Sequence of hidden-states at the output of the last layer of the decoder of the model.`,name:"last_hidden_state"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.decoder_hidden_states",description:`<strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the decoder at the output of each | |
| layer plus the initial embedding outputs.`,name:"decoder_hidden_states"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.decoder_attentions",description:`<strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.`,name:"decoder_attentions"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.cross_attentions",description:`<strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder’s cross-attention layer, after the attention softmax, | |
| used to compute the weighted average in the cross-attention heads.`,name:"cross_attentions"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.encoder_last_hidden_state",description:`<strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Sequence of hidden-states at the output of the last layer of the encoder of the model.`,name:"encoder_last_hidden_state"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each | |
| layer plus the initial embedding outputs.`,name:"encoder_hidden_states"},{anchor:"transformers.models.detr.modeling_detr.DetrObjectDetectionOutput.encoder_attentions",description:`<strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the encoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.`,name:"encoder_attentions"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L135"}}),at=new M({props:{name:"class transformers.models.detr.modeling_detr.DetrSegmentationOutput",anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput",parameters:[{name:"loss",val:": Optional = None"},{name:"loss_dict",val:": Optional = None"},{name:"logits",val:": FloatTensor = None"},{name:"pred_boxes",val:": FloatTensor = None"},{name:"pred_masks",val:": FloatTensor = None"},{name:"auxiliary_outputs",val:": Optional = None"},{name:"last_hidden_state",val:": Optional = None"},{name:"decoder_hidden_states",val:": Optional = None"},{name:"decoder_attentions",val:": Optional = None"},{name:"cross_attentions",val:": Optional = None"},{name:"encoder_last_hidden_state",val:": Optional = None"},{name:"encoder_hidden_states",val:": Optional = None"},{name:"encoder_attentions",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.loss",description:`<strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> are provided)) — | |
| Total loss as a linear combination of a negative log-likehood (cross-entropy) for class prediction and a | |
| bounding box loss. The latter is defined as a linear combination of the L1 loss and the generalized | |
| scale-invariant IoU loss.`,name:"loss"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.loss_dict",description:`<strong>loss_dict</strong> (<code>Dict</code>, <em>optional</em>) — | |
| A dictionary containing the individual losses. Useful for logging.`,name:"loss_dict"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.logits",description:`<strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, num_classes + 1)</code>) — | |
| Classification logits (including no-object) for all queries.`,name:"logits"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.pred_boxes",description:`<strong>pred_boxes</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, 4)</code>) — | |
| Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These | |
| values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding | |
| possible padding). You can use <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_object_detection">post_process_object_detection()</a> to retrieve the | |
| unnormalized bounding boxes.`,name:"pred_boxes"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.pred_masks",description:`<strong>pred_masks</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, height/4, width/4)</code>) — | |
| Segmentation masks logits for all queries. See also | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_semantic_segmentation">post_process_semantic_segmentation()</a> or | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_instance_segmentation">post_process_instance_segmentation()</a> | |
| <a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_panoptic_segmentation">post_process_panoptic_segmentation()</a> to evaluate semantic, instance and panoptic | |
| segmentation masks respectively.`,name:"pred_masks"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.auxiliary_outputs",description:`<strong>auxiliary_outputs</strong> (<code>list[Dict]</code>, <em>optional</em>) — | |
| Optional, only returned when auxiliary losses are activated (i.e. <code>config.auxiliary_loss</code> is set to <code>True</code>) | |
| and labels are provided. It is a list of dictionaries containing the two above keys (<code>logits</code> and | |
| <code>pred_boxes</code>) for each decoder layer.`,name:"auxiliary_outputs"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.last_hidden_state",description:`<strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Sequence of hidden-states at the output of the last layer of the decoder of the model.`,name:"last_hidden_state"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.decoder_hidden_states",description:`<strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the decoder at the output of each | |
| layer plus the initial embedding outputs.`,name:"decoder_hidden_states"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.decoder_attentions",description:`<strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.`,name:"decoder_attentions"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.cross_attentions",description:`<strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder’s cross-attention layer, after the attention softmax, | |
| used to compute the weighted average in the cross-attention heads.`,name:"cross_attentions"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.encoder_last_hidden_state",description:`<strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Sequence of hidden-states at the output of the last layer of the encoder of the model.`,name:"encoder_last_hidden_state"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.encoder_hidden_states",description:`<strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each | |
| layer plus the initial embedding outputs.`,name:"encoder_hidden_states"},{anchor:"transformers.models.detr.modeling_detr.DetrSegmentationOutput.encoder_attentions",description:`<strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — | |
| Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the encoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.`,name:"encoder_attentions"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L198"}}),it=new P({props:{title:"DetrModel",local:"transformers.DetrModel",headingTag:"h2"}}),dt=new M({props:{name:"class transformers.DetrModel",anchor:"transformers.DetrModel",parameters:[{name:"config",val:": DetrConfig"}],parametersDescription:[{anchor:"transformers.DetrModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a>) — | |
| Model configuration class with all the parameters of the model. Initializing with a config file does not | |
| load the weights associated with the model, only the configuration. Check out the | |
| <a href="/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L1178"}}),ct=new M({props:{name:"forward",anchor:"transformers.DetrModel.forward",parameters:[{name:"pixel_values",val:": FloatTensor"},{name:"pixel_mask",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"decoder_inputs_embeds",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DetrModel.forward.pixel_values",description:`<strong>pixel_values</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Pixel values. Padding will be ignored by default should you provide it.</p> | |
| <p>Pixel values can be obtained using <a href="/docs/transformers/main/en/model_doc/auto#transformers.AutoImageProcessor">AutoImageProcessor</a>. See <a href="/docs/transformers/main/en/model_doc/deit#transformers.DeiTFeatureExtractor.__call__">DetrImageProcessor.<strong>call</strong>()</a> for details.`,name:"pixel_values"},{anchor:"transformers.DetrModel.forward.pixel_mask",description:`<strong>pixel_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, height, width)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding pixel values. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for pixels that are real (i.e. <strong>not masked</strong>),</li> | |
| <li>0 for pixels that are padding (i.e. <strong>masked</strong>).</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"pixel_mask"},{anchor:"transformers.DetrModel.forward.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries)</code>, <em>optional</em>) — | |
| Not used by default. Can be used to mask object queries.`,name:"decoder_attention_mask"},{anchor:"transformers.DetrModel.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.DetrModel.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing the flattened feature map (output of the backbone + projection layer), you | |
| can choose to directly pass a flattened representation of an image.`,name:"inputs_embeds"},{anchor:"transformers.DetrModel.forward.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of initializing the queries with a tensor of zeros, you can choose to directly pass an | |
| embedded representation.`,name:"decoder_inputs_embeds"},{anchor:"transformers.DetrModel.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.DetrModel.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.DetrModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/main/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L1219",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.models.detr.modeling_detr.DetrModelOutput" | |
| >transformers.models.detr.modeling_detr.DetrModelOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig" | |
| >DetrConfig</a>) and inputs.</p> | |
| <ul> | |
| <li><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</li> | |
| <li><strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the decoder at the output of each | |
| layer plus the initial embedding outputs.</li> | |
| <li><strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.</li> | |
| <li><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder’s cross-attention layer, after the attention softmax, | |
| used to compute the weighted average in the cross-attention heads.</li> | |
| <li><strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</li> | |
| <li><strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each | |
| layer plus the initial embedding outputs.</li> | |
| <li><strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the encoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.</li> | |
| <li><strong>intermediate_hidden_states</strong> (<code>torch.FloatTensor</code> of shape <code>(config.decoder_layers, batch_size, sequence_length, hidden_size)</code>, <em>optional</em>, returned when <code>config.auxiliary_loss=True</code>) — Intermediate decoder activations, i.e. the output of each decoder layer, each of them gone through a | |
| layernorm.</li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.models.detr.modeling_detr.DetrModelOutput" | |
| >transformers.models.detr.modeling_detr.DetrModelOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),se=new es({props:{$$slots:{default:[sr]},$$scope:{ctx:z}}}),re=new on({props:{anchor:"transformers.DetrModel.forward.example",$$slots:{default:[rr]},$$scope:{ctx:z}}}),lt=new P({props:{title:"DetrForObjectDetection",local:"transformers.DetrForObjectDetection",headingTag:"h2"}}),mt=new M({props:{name:"class transformers.DetrForObjectDetection",anchor:"transformers.DetrForObjectDetection",parameters:[{name:"config",val:": DetrConfig"}],parametersDescription:[{anchor:"transformers.DetrForObjectDetection.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a>) — | |
| Model configuration class with all the parameters of the model. Initializing with a config file does not | |
| load the weights associated with the model, only the configuration. Check out the | |
| <a href="/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L1346"}}),ht=new M({props:{name:"forward",anchor:"transformers.DetrForObjectDetection.forward",parameters:[{name:"pixel_values",val:": FloatTensor"},{name:"pixel_mask",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"decoder_inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DetrForObjectDetection.forward.pixel_values",description:`<strong>pixel_values</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Pixel values. Padding will be ignored by default should you provide it.</p> | |
| <p>Pixel values can be obtained using <a href="/docs/transformers/main/en/model_doc/auto#transformers.AutoImageProcessor">AutoImageProcessor</a>. See <a href="/docs/transformers/main/en/model_doc/deit#transformers.DeiTFeatureExtractor.__call__">DetrImageProcessor.<strong>call</strong>()</a> for details.`,name:"pixel_values"},{anchor:"transformers.DetrForObjectDetection.forward.pixel_mask",description:`<strong>pixel_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, height, width)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding pixel values. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for pixels that are real (i.e. <strong>not masked</strong>),</li> | |
| <li>0 for pixels that are padding (i.e. <strong>masked</strong>).</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"pixel_mask"},{anchor:"transformers.DetrForObjectDetection.forward.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries)</code>, <em>optional</em>) — | |
| Not used by default. Can be used to mask object queries.`,name:"decoder_attention_mask"},{anchor:"transformers.DetrForObjectDetection.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.DetrForObjectDetection.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing the flattened feature map (output of the backbone + projection layer), you | |
| can choose to directly pass a flattened representation of an image.`,name:"inputs_embeds"},{anchor:"transformers.DetrForObjectDetection.forward.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of initializing the queries with a tensor of zeros, you can choose to directly pass an | |
| embedded representation.`,name:"decoder_inputs_embeds"},{anchor:"transformers.DetrForObjectDetection.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.DetrForObjectDetection.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.DetrForObjectDetection.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/main/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.DetrForObjectDetection.forward.labels",description:`<strong>labels</strong> (<code>List[Dict]</code> of len <code>(batch_size,)</code>, <em>optional</em>) — | |
| Labels for computing the bipartite matching loss. List of dicts, each dictionary containing at least the | |
| following 2 keys: ‘class_labels’ and ‘boxes’ (the class labels and bounding boxes of an image in the batch | |
| respectively). The class labels themselves should be a <code>torch.LongTensor</code> of len <code>(number of bounding boxes in the image,)</code> and the boxes a <code>torch.FloatTensor</code> of shape <code>(number of bounding boxes in the image, 4)</code>.`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L1379",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.models.detr.modeling_detr.DetrObjectDetectionOutput" | |
| >transformers.models.detr.modeling_detr.DetrObjectDetectionOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig" | |
| >DetrConfig</a>) and inputs.</p> | |
| <ul> | |
| <li><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> are provided)) — Total loss as a linear combination of a negative log-likehood (cross-entropy) for class prediction and a | |
| bounding box loss. The latter is defined as a linear combination of the L1 loss and the generalized | |
| scale-invariant IoU loss.</li> | |
| <li><strong>loss_dict</strong> (<code>Dict</code>, <em>optional</em>) — A dictionary containing the individual losses. Useful for logging.</li> | |
| <li><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, num_classes + 1)</code>) — Classification logits (including no-object) for all queries.</li> | |
| <li><strong>pred_boxes</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, 4)</code>) — Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These | |
| values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding | |
| possible padding). You can use <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_object_detection" | |
| >post_process_object_detection()</a> to retrieve the | |
| unnormalized bounding boxes.</li> | |
| <li><strong>auxiliary_outputs</strong> (<code>list[Dict]</code>, <em>optional</em>) — Optional, only returned when auxilary losses are activated (i.e. <code>config.auxiliary_loss</code> is set to <code>True</code>) | |
| and labels are provided. It is a list of dictionaries containing the two above keys (<code>logits</code> and | |
| <code>pred_boxes</code>) for each decoder layer.</li> | |
| <li><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</li> | |
| <li><strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the decoder at the output of each | |
| layer plus the initial embedding outputs.</li> | |
| <li><strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.</li> | |
| <li><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder’s cross-attention layer, after the attention softmax, | |
| used to compute the weighted average in the cross-attention heads.</li> | |
| <li><strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</li> | |
| <li><strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each | |
| layer plus the initial embedding outputs.</li> | |
| <li><strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the encoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.</li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.models.detr.modeling_detr.DetrObjectDetectionOutput" | |
| >transformers.models.detr.modeling_detr.DetrObjectDetectionOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),ae=new es({props:{$$slots:{default:[ar]},$$scope:{ctx:z}}}),ie=new on({props:{anchor:"transformers.DetrForObjectDetection.forward.example",$$slots:{default:[ir]},$$scope:{ctx:z}}}),pt=new P({props:{title:"DetrForSegmentation",local:"transformers.DetrForSegmentation",headingTag:"h2"}}),gt=new M({props:{name:"class transformers.DetrForSegmentation",anchor:"transformers.DetrForSegmentation",parameters:[{name:"config",val:": DetrConfig"}],parametersDescription:[{anchor:"transformers.DetrForSegmentation.config",description:`<strong>config</strong> (<a href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig">DetrConfig</a>) — | |
| Model configuration class with all the parameters of the model. Initializing with a config file does not | |
| load the weights associated with the model, only the configuration. Check out the | |
| <a href="/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained">from_pretrained()</a> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L1519"}}),ut=new M({props:{name:"forward",anchor:"transformers.DetrForSegmentation.forward",parameters:[{name:"pixel_values",val:": FloatTensor"},{name:"pixel_mask",val:": Optional = None"},{name:"decoder_attention_mask",val:": Optional = None"},{name:"encoder_outputs",val:": Optional = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"decoder_inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.DetrForSegmentation.forward.pixel_values",description:`<strong>pixel_values</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_channels, height, width)</code>) — | |
| Pixel values. Padding will be ignored by default should you provide it.</p> | |
| <p>Pixel values can be obtained using <a href="/docs/transformers/main/en/model_doc/auto#transformers.AutoImageProcessor">AutoImageProcessor</a>. See <a href="/docs/transformers/main/en/model_doc/deit#transformers.DeiTFeatureExtractor.__call__">DetrImageProcessor.<strong>call</strong>()</a> for details.`,name:"pixel_values"},{anchor:"transformers.DetrForSegmentation.forward.pixel_mask",description:`<strong>pixel_mask</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, height, width)</code>, <em>optional</em>) — | |
| Mask to avoid performing attention on padding pixel values. Mask values selected in <code>[0, 1]</code>:</p> | |
| <ul> | |
| <li>1 for pixels that are real (i.e. <strong>not masked</strong>),</li> | |
| <li>0 for pixels that are padding (i.e. <strong>masked</strong>).</li> | |
| </ul> | |
| <p><a href="../glossary#attention-mask">What are attention masks?</a>`,name:"pixel_mask"},{anchor:"transformers.DetrForSegmentation.forward.decoder_attention_mask",description:`<strong>decoder_attention_mask</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries)</code>, <em>optional</em>) — | |
| Not used by default. Can be used to mask object queries.`,name:"decoder_attention_mask"},{anchor:"transformers.DetrForSegmentation.forward.encoder_outputs",description:`<strong>encoder_outputs</strong> (<code>tuple(tuple(torch.FloatTensor)</code>, <em>optional</em>) — | |
| Tuple consists of (<code>last_hidden_state</code>, <em>optional</em>: <code>hidden_states</code>, <em>optional</em>: <code>attentions</code>) | |
| <code>last_hidden_state</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) is a sequence of | |
| hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.`,name:"encoder_outputs"},{anchor:"transformers.DetrForSegmentation.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of passing the flattened feature map (output of the backbone + projection layer), you | |
| can choose to directly pass a flattened representation of an image.`,name:"inputs_embeds"},{anchor:"transformers.DetrForSegmentation.forward.decoder_inputs_embeds",description:`<strong>decoder_inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, hidden_size)</code>, <em>optional</em>) — | |
| Optionally, instead of initializing the queries with a tensor of zeros, you can choose to directly pass an | |
| embedded representation.`,name:"decoder_inputs_embeds"},{anchor:"transformers.DetrForSegmentation.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned | |
| tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.DetrForSegmentation.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for | |
| more detail.`,name:"output_hidden_states"},{anchor:"transformers.DetrForSegmentation.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) — | |
| Whether or not to return a <a href="/docs/transformers/main/en/main_classes/output#transformers.utils.ModelOutput">ModelOutput</a> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.DetrForSegmentation.forward.labels",description:`<strong>labels</strong> (<code>List[Dict]</code> of len <code>(batch_size,)</code>, <em>optional</em>) — | |
| Labels for computing the bipartite matching loss, DICE/F-1 loss and Focal loss. List of dicts, each | |
| dictionary containing at least the following 3 keys: ‘class_labels’, ‘boxes’ and ‘masks’ (the class labels, | |
| bounding boxes and segmentation masks of an image in the batch respectively). The class labels themselves | |
| should be a <code>torch.LongTensor</code> of len <code>(number of bounding boxes in the image,)</code>, the boxes a | |
| <code>torch.FloatTensor</code> of shape <code>(number of bounding boxes in the image, 4)</code> and the masks a | |
| <code>torch.FloatTensor</code> of shape <code>(number of bounding boxes in the image, height, width)</code>.`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/main/src/transformers/models/detr/modeling_detr.py#L1549",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p>A <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.models.detr.modeling_detr.DetrSegmentationOutput" | |
| >transformers.models.detr.modeling_detr.DetrSegmentationOutput</a> or a tuple of | |
| <code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various | |
| elements depending on the configuration (<a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrConfig" | |
| >DetrConfig</a>) and inputs.</p> | |
| <ul> | |
| <li><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> are provided)) — Total loss as a linear combination of a negative log-likehood (cross-entropy) for class prediction and a | |
| bounding box loss. The latter is defined as a linear combination of the L1 loss and the generalized | |
| scale-invariant IoU loss.</li> | |
| <li><strong>loss_dict</strong> (<code>Dict</code>, <em>optional</em>) — A dictionary containing the individual losses. Useful for logging.</li> | |
| <li><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, num_classes + 1)</code>) — Classification logits (including no-object) for all queries.</li> | |
| <li><strong>pred_boxes</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, 4)</code>) — Normalized boxes coordinates for all queries, represented as (center_x, center_y, width, height). These | |
| values are normalized in [0, 1], relative to the size of each individual image in the batch (disregarding | |
| possible padding). You can use <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_object_detection" | |
| >post_process_object_detection()</a> to retrieve the | |
| unnormalized bounding boxes.</li> | |
| <li><strong>pred_masks</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, num_queries, height/4, width/4)</code>) — Segmentation masks logits for all queries. See also | |
| <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_semantic_segmentation" | |
| >post_process_semantic_segmentation()</a> or | |
| <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_instance_segmentation" | |
| >post_process_instance_segmentation()</a> | |
| <a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.DetrImageProcessor.post_process_panoptic_segmentation" | |
| >post_process_panoptic_segmentation()</a> to evaluate semantic, instance and panoptic | |
| segmentation masks respectively.</li> | |
| <li><strong>auxiliary_outputs</strong> (<code>list[Dict]</code>, <em>optional</em>) — Optional, only returned when auxiliary losses are activated (i.e. <code>config.auxiliary_loss</code> is set to <code>True</code>) | |
| and labels are provided. It is a list of dictionaries containing the two above keys (<code>logits</code> and | |
| <code>pred_boxes</code>) for each decoder layer.</li> | |
| <li><strong>last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the decoder of the model.</li> | |
| <li><strong>decoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the decoder at the output of each | |
| layer plus the initial embedding outputs.</li> | |
| <li><strong>decoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.</li> | |
| <li><strong>cross_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the decoder’s cross-attention layer, after the attention softmax, | |
| used to compute the weighted average in the cross-attention heads.</li> | |
| <li><strong>encoder_last_hidden_state</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) — Sequence of hidden-states at the output of the last layer of the encoder of the model.</li> | |
| <li><strong>encoder_hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings + one for the output of each layer) of | |
| shape <code>(batch_size, sequence_length, hidden_size)</code>. Hidden-states of the encoder at the output of each | |
| layer plus the initial embedding outputs.</li> | |
| <li><strong>encoder_attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>. Attentions weights of the encoder, after the attention softmax, used to compute the | |
| weighted average in the self-attention heads.</li> | |
| </ul> | |
| `,returnType:`<script context="module">export const metadata = 'undefined';<\/script> | |
| <p><a | |
| href="/docs/transformers/main/en/model_doc/detr#transformers.models.detr.modeling_detr.DetrSegmentationOutput" | |
| >transformers.models.detr.modeling_detr.DetrSegmentationOutput</a> or <code>tuple(torch.FloatTensor)</code></p> | |
| `}}),de=new es({props:{$$slots:{default:[dr]},$$scope:{ctx:z}}}),ce=new on({props:{anchor:"transformers.DetrForSegmentation.forward.example",$$slots:{default:[cr]},$$scope:{ctx:z}}}),ft=new or({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/en/model_doc/detr.md"}}),{c(){c=i("meta"),T=n(),b=i("p"),w=n(),h(y.$$.fragment),l=n(),h(D.$$.fragment),eo=n(),pe=i("p"),pe.innerHTML=ts,to=n(),ge=i("p"),ge.textContent=os,oo=n(),ue=i("p"),ue.innerHTML=ns,no=n(),fe=i("p"),fe.innerHTML=ss,so=n(),h(_e.$$.fragment),ro=n(),be=i("p"),be.innerHTML=rs,ao=n(),we=i("p"),we.innerHTML=as,io=n(),ye=i("p"),ye.innerHTML=is,co=n(),Te=i("p"),Te.innerHTML=ds,lo=n(),ve=i("p"),ve.innerHTML=cs,mo=n(),h(xe.$$.fragment),ho=n(),De=i("ul"),De.innerHTML=ls,po=n(),Me=i("p"),Me.textContent=ms,go=n(),je=i("p"),je.textContent=hs,uo=n(),h($e.$$.fragment),fo=n(),ke=i("p"),ke.textContent=ps,_o=n(),h(ze.$$.fragment),bo=n(),Ce=i("p"),Ce.textContent=gs,wo=n(),h(Fe.$$.fragment),yo=n(),Ie=i("p"),Ie.textContent=us,To=n(),Je=i("table"),Je.innerHTML=fs,vo=n(),qe=i("p"),qe.innerHTML=_s,xo=n(),h(Ne.$$.fragment),Do=n(),Oe=i("p"),Oe.textContent=bs,Mo=n(),h(Ue.$$.fragment),jo=n(),Ee=i("ul"),Ee.innerHTML=ws,$o=n(),Pe=i("p"),Pe.textContent=ys,ko=n(),h(Ze.$$.fragment),zo=n(),C=i("div"),h(Re.$$.fragment),nn=n(),Dt=i("p"),Dt.innerHTML=Ts,sn=n(),Mt=i("p"),Mt.innerHTML=vs,rn=n(),h(B.$$.fragment),an=n(),G=i("div"),h(We.$$.fragment),dn=n(),jt=i("p"),jt.innerHTML=xs,Co=n(),h(Le.$$.fragment),Fo=n(),j=i("div"),h(Se.$$.fragment),cn=n(),$t=i("p"),$t.textContent=Ds,ln=n(),A=i("div"),h(He.$$.fragment),mn=n(),kt=i("p"),kt.textContent=Ms,hn=n(),V=i("div"),h(Be.$$.fragment),pn=n(),zt=i("p"),zt.innerHTML=js,gn=n(),X=i("div"),h(Ge.$$.fragment),un=n(),Ct=i("p"),Ct.innerHTML=$s,fn=n(),Y=i("div"),h(Ae.$$.fragment),_n=n(),Ft=i("p"),Ft.innerHTML=ks,bn=n(),Q=i("div"),h(Ve.$$.fragment),wn=n(),It=i("p"),It.innerHTML=zs,Io=n(),h(Xe.$$.fragment),Jo=n(),$=i("div"),h(Ye.$$.fragment),yn=n(),K=i("div"),h(Qe.$$.fragment),Tn=n(),Jt=i("p"),Jt.textContent=Cs,vn=n(),ee=i("div"),h(Ke.$$.fragment),xn=n(),qt=i("p"),qt.innerHTML=Fs,Dn=n(),te=i("div"),h(et.$$.fragment),Mn=n(),Nt=i("p"),Nt.innerHTML=Is,jn=n(),oe=i("div"),h(tt.$$.fragment),$n=n(),Ot=i("p"),Ot.innerHTML=Js,kn=n(),ne=i("div"),h(ot.$$.fragment),zn=n(),Ut=i("p"),Ut.innerHTML=qs,qo=n(),h(nt.$$.fragment),No=n(),L=i("div"),h(st.$$.fragment),Cn=n(),Et=i("p"),Et.textContent=Ns,Oo=n(),S=i("div"),h(rt.$$.fragment),Fn=n(),Pt=i("p"),Pt.innerHTML=Os,Uo=n(),H=i("div"),h(at.$$.fragment),In=n(),Zt=i("p"),Zt.innerHTML=Us,Eo=n(),h(it.$$.fragment),Po=n(),F=i("div"),h(dt.$$.fragment),Jn=n(),Rt=i("p"),Rt.textContent=Es,qn=n(),Wt=i("p"),Wt.innerHTML=Ps,Nn=n(),Lt=i("p"),Lt.innerHTML=Zs,On=n(),N=i("div"),h(ct.$$.fragment),Un=n(),St=i("p"),St.innerHTML=Rs,En=n(),h(se.$$.fragment),Pn=n(),h(re.$$.fragment),Zo=n(),h(lt.$$.fragment),Ro=n(),I=i("div"),h(mt.$$.fragment),Zn=n(),Ht=i("p"),Ht.textContent=Ws,Rn=n(),Bt=i("p"),Bt.innerHTML=Ls,Wn=n(),Gt=i("p"),Gt.innerHTML=Ss,Ln=n(),O=i("div"),h(ht.$$.fragment),Sn=n(),At=i("p"),At.innerHTML=Hs,Hn=n(),h(ae.$$.fragment),Bn=n(),h(ie.$$.fragment),Wo=n(),h(pt.$$.fragment),Lo=n(),J=i("div"),h(gt.$$.fragment),Gn=n(),Vt=i("p"),Vt.textContent=Bs,An=n(),Xt=i("p"),Xt.innerHTML=Gs,Vn=n(),Yt=i("p"),Yt.innerHTML=As,Xn=n(),U=i("div"),h(ut.$$.fragment),Yn=n(),Qt=i("p"),Qt.innerHTML=Vs,Qn=n(),h(de.$$.fragment),Kn=n(),h(ce.$$.fragment),So=n(),h(ft.$$.fragment),Ho=n(),Kt=i("p"),this.h()},l(e){const o=er("svelte-u9bgzb",document.head);c=d(o,"META",{name:!0,content:!0}),o.forEach(t),T=s(e),b=d(e,"P",{}),v(b).forEach(t),w=s(e),p(y.$$.fragment,e),l=s(e),p(D.$$.fragment,e),eo=s(e),pe=d(e,"P",{"data-svelte-h":!0}),m(pe)!=="svelte-1619prt"&&(pe.innerHTML=ts),to=s(e),ge=d(e,"P",{"data-svelte-h":!0}),m(ge)!=="svelte-vfdo9a"&&(ge.textContent=os),oo=s(e),ue=d(e,"P",{"data-svelte-h":!0}),m(ue)!=="svelte-s87elr"&&(ue.innerHTML=ns),no=s(e),fe=d(e,"P",{"data-svelte-h":!0}),m(fe)!=="svelte-1gw5wuz"&&(fe.innerHTML=ss),so=s(e),p(_e.$$.fragment,e),ro=s(e),be=d(e,"P",{"data-svelte-h":!0}),m(be)!=="svelte-101xdgk"&&(be.innerHTML=rs),ao=s(e),we=d(e,"P",{"data-svelte-h":!0}),m(we)!=="svelte-x01cd0"&&(we.innerHTML=as),io=s(e),ye=d(e,"P",{"data-svelte-h":!0}),m(ye)!=="svelte-i2avyi"&&(ye.innerHTML=is),co=s(e),Te=d(e,"P",{"data-svelte-h":!0}),m(Te)!=="svelte-351ubi"&&(Te.innerHTML=ds),lo=s(e),ve=d(e,"P",{"data-svelte-h":!0}),m(ve)!=="svelte-7chpgp"&&(ve.innerHTML=cs),mo=s(e),p(xe.$$.fragment,e),ho=s(e),De=d(e,"UL",{"data-svelte-h":!0}),m(De)!=="svelte-uuboem"&&(De.innerHTML=ls),po=s(e),Me=d(e,"P",{"data-svelte-h":!0}),m(Me)!=="svelte-5tif3l"&&(Me.textContent=ms),go=s(e),je=d(e,"P",{"data-svelte-h":!0}),m(je)!=="svelte-ixg096"&&(je.textContent=hs),uo=s(e),p($e.$$.fragment,e),fo=s(e),ke=d(e,"P",{"data-svelte-h":!0}),m(ke)!=="svelte-14rhv4g"&&(ke.textContent=ps),_o=s(e),p(ze.$$.fragment,e),bo=s(e),Ce=d(e,"P",{"data-svelte-h":!0}),m(Ce)!=="svelte-1hfzzjq"&&(Ce.textContent=gs),wo=s(e),p(Fe.$$.fragment,e),yo=s(e),Ie=d(e,"P",{"data-svelte-h":!0}),m(Ie)!=="svelte-1e3p89m"&&(Ie.textContent=us),To=s(e),Je=d(e,"TABLE",{"data-svelte-h":!0}),m(Je)!=="svelte-1nhtapy"&&(Je.innerHTML=fs),vo=s(e),qe=d(e,"P",{"data-svelte-h":!0}),m(qe)!=="svelte-vv36hf"&&(qe.innerHTML=_s),xo=s(e),p(Ne.$$.fragment,e),Do=s(e),Oe=d(e,"P",{"data-svelte-h":!0}),m(Oe)!=="svelte-5jc6k6"&&(Oe.textContent=bs),Mo=s(e),p(Ue.$$.fragment,e),jo=s(e),Ee=d(e,"UL",{"data-svelte-h":!0}),m(Ee)!=="svelte-1q9o18i"&&(Ee.innerHTML=ws),$o=s(e),Pe=d(e,"P",{"data-svelte-h":!0}),m(Pe)!=="svelte-1xesile"&&(Pe.textContent=ys),ko=s(e),p(Ze.$$.fragment,e),zo=s(e),C=d(e,"DIV",{class:!0});var q=v(C);p(Re.$$.fragment,q),nn=s(q),Dt=d(q,"P",{"data-svelte-h":!0}),m(Dt)!=="svelte-1nmvrme"&&(Dt.innerHTML=Ts),sn=s(q),Mt=d(q,"P",{"data-svelte-h":!0}),m(Mt)!=="svelte-o55m63"&&(Mt.innerHTML=vs),rn=s(q),p(B.$$.fragment,q),an=s(q),G=d(q,"DIV",{class:!0});var _t=v(G);p(We.$$.fragment,_t),dn=s(_t),jt=d(_t,"P",{"data-svelte-h":!0}),m(jt)!=="svelte-ko8kbn"&&(jt.innerHTML=xs),_t.forEach(t),q.forEach(t),Co=s(e),p(Le.$$.fragment,e),Fo=s(e),j=d(e,"DIV",{class:!0});var k=v(j);p(Se.$$.fragment,k),cn=s(k),$t=d(k,"P",{"data-svelte-h":!0}),m($t)!=="svelte-19j0nu1"&&($t.textContent=Ds),ln=s(k),A=d(k,"DIV",{class:!0});var bt=v(A);p(He.$$.fragment,bt),mn=s(bt),kt=d(bt,"P",{"data-svelte-h":!0}),m(kt)!=="svelte-jgz2ra"&&(kt.textContent=Ms),bt.forEach(t),hn=s(k),V=d(k,"DIV",{class:!0});var wt=v(V);p(Be.$$.fragment,wt),pn=s(wt),zt=d(wt,"P",{"data-svelte-h":!0}),m(zt)!=="svelte-1w2swp4"&&(zt.innerHTML=js),wt.forEach(t),gn=s(k),X=d(k,"DIV",{class:!0});var yt=v(X);p(Ge.$$.fragment,yt),un=s(yt),Ct=d(yt,"P",{"data-svelte-h":!0}),m(Ct)!=="svelte-18jh4db"&&(Ct.innerHTML=$s),yt.forEach(t),fn=s(k),Y=d(k,"DIV",{class:!0});var Tt=v(Y);p(Ae.$$.fragment,Tt),_n=s(Tt),Ft=d(Tt,"P",{"data-svelte-h":!0}),m(Ft)!=="svelte-1u1laof"&&(Ft.innerHTML=ks),Tt.forEach(t),bn=s(k),Q=d(k,"DIV",{class:!0});var Go=v(Q);p(Ve.$$.fragment,Go),wn=s(Go),It=d(Go,"P",{"data-svelte-h":!0}),m(It)!=="svelte-192g4pt"&&(It.innerHTML=zs),Go.forEach(t),k.forEach(t),Io=s(e),p(Xe.$$.fragment,e),Jo=s(e),$=d(e,"DIV",{class:!0});var E=v($);p(Ye.$$.fragment,E),yn=s(E),K=d(E,"DIV",{class:!0});var Ao=v(K);p(Qe.$$.fragment,Ao),Tn=s(Ao),Jt=d(Ao,"P",{"data-svelte-h":!0}),m(Jt)!=="svelte-khengj"&&(Jt.textContent=Cs),Ao.forEach(t),vn=s(E),ee=d(E,"DIV",{class:!0});var Vo=v(ee);p(Ke.$$.fragment,Vo),xn=s(Vo),qt=d(Vo,"P",{"data-svelte-h":!0}),m(qt)!=="svelte-1w2swp4"&&(qt.innerHTML=Fs),Vo.forEach(t),Dn=s(E),te=d(E,"DIV",{class:!0});var Xo=v(te);p(et.$$.fragment,Xo),Mn=s(Xo),Nt=d(Xo,"P",{"data-svelte-h":!0}),m(Nt)!=="svelte-18jh4db"&&(Nt.innerHTML=Is),Xo.forEach(t),jn=s(E),oe=d(E,"DIV",{class:!0});var Yo=v(oe);p(tt.$$.fragment,Yo),$n=s(Yo),Ot=d(Yo,"P",{"data-svelte-h":!0}),m(Ot)!=="svelte-1u1laof"&&(Ot.innerHTML=Js),Yo.forEach(t),kn=s(E),ne=d(E,"DIV",{class:!0});var Qo=v(ne);p(ot.$$.fragment,Qo),zn=s(Qo),Ut=d(Qo,"P",{"data-svelte-h":!0}),m(Ut)!=="svelte-192g4pt"&&(Ut.innerHTML=qs),Qo.forEach(t),E.forEach(t),qo=s(e),p(nt.$$.fragment,e),No=s(e),L=d(e,"DIV",{class:!0});var Ko=v(L);p(st.$$.fragment,Ko),Cn=s(Ko),Et=d(Ko,"P",{"data-svelte-h":!0}),m(Et)!=="svelte-1ya2yj5"&&(Et.textContent=Ns),Ko.forEach(t),Oo=s(e),S=d(e,"DIV",{class:!0});var en=v(S);p(rt.$$.fragment,en),Fn=s(en),Pt=d(en,"P",{"data-svelte-h":!0}),m(Pt)!=="svelte-1pxfrhm"&&(Pt.innerHTML=Os),en.forEach(t),Uo=s(e),H=d(e,"DIV",{class:!0});var tn=v(H);p(at.$$.fragment,tn),In=s(tn),Zt=d(tn,"P",{"data-svelte-h":!0}),m(Zt)!=="svelte-1nd4e56"&&(Zt.innerHTML=Us),tn.forEach(t),Eo=s(e),p(it.$$.fragment,e),Po=s(e),F=d(e,"DIV",{class:!0});var Z=v(F);p(dt.$$.fragment,Z),Jn=s(Z),Rt=d(Z,"P",{"data-svelte-h":!0}),m(Rt)!=="svelte-esnh0n"&&(Rt.textContent=Es),qn=s(Z),Wt=d(Z,"P",{"data-svelte-h":!0}),m(Wt)!=="svelte-6pahdo"&&(Wt.innerHTML=Ps),Nn=s(Z),Lt=d(Z,"P",{"data-svelte-h":!0}),m(Lt)!=="svelte-hswkmf"&&(Lt.innerHTML=Zs),On=s(Z),N=d(Z,"DIV",{class:!0});var le=v(N);p(ct.$$.fragment,le),Un=s(le),St=d(le,"P",{"data-svelte-h":!0}),m(St)!=="svelte-1wwznng"&&(St.innerHTML=Rs),En=s(le),p(se.$$.fragment,le),Pn=s(le),p(re.$$.fragment,le),le.forEach(t),Z.forEach(t),Zo=s(e),p(lt.$$.fragment,e),Ro=s(e),I=d(e,"DIV",{class:!0});var R=v(I);p(mt.$$.fragment,R),Zn=s(R),Ht=d(R,"P",{"data-svelte-h":!0}),m(Ht)!=="svelte-dw6bi4"&&(Ht.textContent=Ws),Rn=s(R),Bt=d(R,"P",{"data-svelte-h":!0}),m(Bt)!=="svelte-6pahdo"&&(Bt.innerHTML=Ls),Wn=s(R),Gt=d(R,"P",{"data-svelte-h":!0}),m(Gt)!=="svelte-hswkmf"&&(Gt.innerHTML=Ss),Ln=s(R),O=d(R,"DIV",{class:!0});var me=v(O);p(ht.$$.fragment,me),Sn=s(me),At=d(me,"P",{"data-svelte-h":!0}),m(At)!=="svelte-djwew4"&&(At.innerHTML=Hs),Hn=s(me),p(ae.$$.fragment,me),Bn=s(me),p(ie.$$.fragment,me),me.forEach(t),R.forEach(t),Wo=s(e),p(pt.$$.fragment,e),Lo=s(e),J=d(e,"DIV",{class:!0});var W=v(J);p(gt.$$.fragment,W),Gn=s(W),Vt=d(W,"P",{"data-svelte-h":!0}),m(Vt)!=="svelte-1yivh9f"&&(Vt.textContent=Bs),An=s(W),Xt=d(W,"P",{"data-svelte-h":!0}),m(Xt)!=="svelte-6pahdo"&&(Xt.innerHTML=Gs),Vn=s(W),Yt=d(W,"P",{"data-svelte-h":!0}),m(Yt)!=="svelte-hswkmf"&&(Yt.innerHTML=As),Xn=s(W),U=d(W,"DIV",{class:!0});var he=v(U);p(ut.$$.fragment,he),Yn=s(he),Qt=d(he,"P",{"data-svelte-h":!0}),m(Qt)!=="svelte-7twj3g"&&(Qt.innerHTML=Vs),Qn=s(he),p(de.$$.fragment,he),Kn=s(he),p(ce.$$.fragment,he),he.forEach(t),W.forEach(t),So=s(e),p(ft.$$.fragment,e),Ho=s(e),Kt=d(e,"P",{}),v(Kt).forEach(t),this.h()},h(){x(c,"name","hf:doc:metadata"),x(c,"content",mr),x(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(C,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(A,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(V,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(Y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(Q,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(ee,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(te,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(oe,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(ne,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x($,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(L,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(I,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,o){r(document.head,c),a(e,T,o),a(e,b,o),a(e,w,o),g(y,e,o),a(e,l,o),g(D,e,o),a(e,eo,o),a(e,pe,o),a(e,to,o),a(e,ge,o),a(e,oo,o),a(e,ue,o),a(e,no,o),a(e,fe,o),a(e,so,o),g(_e,e,o),a(e,ro,o),a(e,be,o),a(e,ao,o),a(e,we,o),a(e,io,o),a(e,ye,o),a(e,co,o),a(e,Te,o),a(e,lo,o),a(e,ve,o),a(e,mo,o),g(xe,e,o),a(e,ho,o),a(e,De,o),a(e,po,o),a(e,Me,o),a(e,go,o),a(e,je,o),a(e,uo,o),g($e,e,o),a(e,fo,o),a(e,ke,o),a(e,_o,o),g(ze,e,o),a(e,bo,o),a(e,Ce,o),a(e,wo,o),g(Fe,e,o),a(e,yo,o),a(e,Ie,o),a(e,To,o),a(e,Je,o),a(e,vo,o),a(e,qe,o),a(e,xo,o),g(Ne,e,o),a(e,Do,o),a(e,Oe,o),a(e,Mo,o),g(Ue,e,o),a(e,jo,o),a(e,Ee,o),a(e,$o,o),a(e,Pe,o),a(e,ko,o),g(Ze,e,o),a(e,zo,o),a(e,C,o),g(Re,C,null),r(C,nn),r(C,Dt),r(C,sn),r(C,Mt),r(C,rn),g(B,C,null),r(C,an),r(C,G),g(We,G,null),r(G,dn),r(G,jt),a(e,Co,o),g(Le,e,o),a(e,Fo,o),a(e,j,o),g(Se,j,null),r(j,cn),r(j,$t),r(j,ln),r(j,A),g(He,A,null),r(A,mn),r(A,kt),r(j,hn),r(j,V),g(Be,V,null),r(V,pn),r(V,zt),r(j,gn),r(j,X),g(Ge,X,null),r(X,un),r(X,Ct),r(j,fn),r(j,Y),g(Ae,Y,null),r(Y,_n),r(Y,Ft),r(j,bn),r(j,Q),g(Ve,Q,null),r(Q,wn),r(Q,It),a(e,Io,o),g(Xe,e,o),a(e,Jo,o),a(e,$,o),g(Ye,$,null),r($,yn),r($,K),g(Qe,K,null),r(K,Tn),r(K,Jt),r($,vn),r($,ee),g(Ke,ee,null),r(ee,xn),r(ee,qt),r($,Dn),r($,te),g(et,te,null),r(te,Mn),r(te,Nt),r($,jn),r($,oe),g(tt,oe,null),r(oe,$n),r(oe,Ot),r($,kn),r($,ne),g(ot,ne,null),r(ne,zn),r(ne,Ut),a(e,qo,o),g(nt,e,o),a(e,No,o),a(e,L,o),g(st,L,null),r(L,Cn),r(L,Et),a(e,Oo,o),a(e,S,o),g(rt,S,null),r(S,Fn),r(S,Pt),a(e,Uo,o),a(e,H,o),g(at,H,null),r(H,In),r(H,Zt),a(e,Eo,o),g(it,e,o),a(e,Po,o),a(e,F,o),g(dt,F,null),r(F,Jn),r(F,Rt),r(F,qn),r(F,Wt),r(F,Nn),r(F,Lt),r(F,On),r(F,N),g(ct,N,null),r(N,Un),r(N,St),r(N,En),g(se,N,null),r(N,Pn),g(re,N,null),a(e,Zo,o),g(lt,e,o),a(e,Ro,o),a(e,I,o),g(mt,I,null),r(I,Zn),r(I,Ht),r(I,Rn),r(I,Bt),r(I,Wn),r(I,Gt),r(I,Ln),r(I,O),g(ht,O,null),r(O,Sn),r(O,At),r(O,Hn),g(ae,O,null),r(O,Bn),g(ie,O,null),a(e,Wo,o),g(pt,e,o),a(e,Lo,o),a(e,J,o),g(gt,J,null),r(J,Gn),r(J,Vt),r(J,An),r(J,Xt),r(J,Vn),r(J,Yt),r(J,Xn),r(J,U),g(ut,U,null),r(U,Yn),r(U,Qt),r(U,Qn),g(de,U,null),r(U,Kn),g(ce,U,null),a(e,So,o),g(ft,e,o),a(e,Ho,o),a(e,Kt,o),Bo=!0},p(e,[o]){const q={};o&2&&(q.$$scope={dirty:o,ctx:e}),B.$set(q);const _t={};o&2&&(_t.$$scope={dirty:o,ctx:e}),se.$set(_t);const k={};o&2&&(k.$$scope={dirty:o,ctx:e}),re.$set(k);const bt={};o&2&&(bt.$$scope={dirty:o,ctx:e}),ae.$set(bt);const wt={};o&2&&(wt.$$scope={dirty:o,ctx:e}),ie.$set(wt);const yt={};o&2&&(yt.$$scope={dirty:o,ctx:e}),de.$set(yt);const Tt={};o&2&&(Tt.$$scope={dirty:o,ctx:e}),ce.$set(Tt)},i(e){Bo||(u(y.$$.fragment,e),u(D.$$.fragment,e),u(_e.$$.fragment,e),u(xe.$$.fragment,e),u($e.$$.fragment,e),u(ze.$$.fragment,e),u(Fe.$$.fragment,e),u(Ne.$$.fragment,e),u(Ue.$$.fragment,e),u(Ze.$$.fragment,e),u(Re.$$.fragment,e),u(B.$$.fragment,e),u(We.$$.fragment,e),u(Le.$$.fragment,e),u(Se.$$.fragment,e),u(He.$$.fragment,e),u(Be.$$.fragment,e),u(Ge.$$.fragment,e),u(Ae.$$.fragment,e),u(Ve.$$.fragment,e),u(Xe.$$.fragment,e),u(Ye.$$.fragment,e),u(Qe.$$.fragment,e),u(Ke.$$.fragment,e),u(et.$$.fragment,e),u(tt.$$.fragment,e),u(ot.$$.fragment,e),u(nt.$$.fragment,e),u(st.$$.fragment,e),u(rt.$$.fragment,e),u(at.$$.fragment,e),u(it.$$.fragment,e),u(dt.$$.fragment,e),u(ct.$$.fragment,e),u(se.$$.fragment,e),u(re.$$.fragment,e),u(lt.$$.fragment,e),u(mt.$$.fragment,e),u(ht.$$.fragment,e),u(ae.$$.fragment,e),u(ie.$$.fragment,e),u(pt.$$.fragment,e),u(gt.$$.fragment,e),u(ut.$$.fragment,e),u(de.$$.fragment,e),u(ce.$$.fragment,e),u(ft.$$.fragment,e),Bo=!0)},o(e){f(y.$$.fragment,e),f(D.$$.fragment,e),f(_e.$$.fragment,e),f(xe.$$.fragment,e),f($e.$$.fragment,e),f(ze.$$.fragment,e),f(Fe.$$.fragment,e),f(Ne.$$.fragment,e),f(Ue.$$.fragment,e),f(Ze.$$.fragment,e),f(Re.$$.fragment,e),f(B.$$.fragment,e),f(We.$$.fragment,e),f(Le.$$.fragment,e),f(Se.$$.fragment,e),f(He.$$.fragment,e),f(Be.$$.fragment,e),f(Ge.$$.fragment,e),f(Ae.$$.fragment,e),f(Ve.$$.fragment,e),f(Xe.$$.fragment,e),f(Ye.$$.fragment,e),f(Qe.$$.fragment,e),f(Ke.$$.fragment,e),f(et.$$.fragment,e),f(tt.$$.fragment,e),f(ot.$$.fragment,e),f(nt.$$.fragment,e),f(st.$$.fragment,e),f(rt.$$.fragment,e),f(at.$$.fragment,e),f(it.$$.fragment,e),f(dt.$$.fragment,e),f(ct.$$.fragment,e),f(se.$$.fragment,e),f(re.$$.fragment,e),f(lt.$$.fragment,e),f(mt.$$.fragment,e),f(ht.$$.fragment,e),f(ae.$$.fragment,e),f(ie.$$.fragment,e),f(pt.$$.fragment,e),f(gt.$$.fragment,e),f(ut.$$.fragment,e),f(de.$$.fragment,e),f(ce.$$.fragment,e),f(ft.$$.fragment,e),Bo=!1},d(e){e&&(t(T),t(b),t(w),t(l),t(eo),t(pe),t(to),t(ge),t(oo),t(ue),t(no),t(fe),t(so),t(ro),t(be),t(ao),t(we),t(io),t(ye),t(co),t(Te),t(lo),t(ve),t(mo),t(ho),t(De),t(po),t(Me),t(go),t(je),t(uo),t(fo),t(ke),t(_o),t(bo),t(Ce),t(wo),t(yo),t(Ie),t(To),t(Je),t(vo),t(qe),t(xo),t(Do),t(Oe),t(Mo),t(jo),t(Ee),t($o),t(Pe),t(ko),t(zo),t(C),t(Co),t(Fo),t(j),t(Io),t(Jo),t($),t(qo),t(No),t(L),t(Oo),t(S),t(Uo),t(H),t(Eo),t(Po),t(F),t(Zo),t(Ro),t(I),t(Wo),t(Lo),t(J),t(So),t(Ho),t(Kt)),t(c),_(y,e),_(D,e),_(_e,e),_(xe,e),_($e,e),_(ze,e),_(Fe,e),_(Ne,e),_(Ue,e),_(Ze,e),_(Re),_(B),_(We),_(Le,e),_(Se),_(He),_(Be),_(Ge),_(Ae),_(Ve),_(Xe,e),_(Ye),_(Qe),_(Ke),_(et),_(tt),_(ot),_(nt,e),_(st),_(rt),_(at),_(it,e),_(dt),_(ct),_(se),_(re),_(lt,e),_(mt),_(ht),_(ae),_(ie),_(pt,e),_(gt),_(ut),_(de),_(ce),_(ft,e)}}}const mr='{"title":"DETR","local":"detr","sections":[{"title":"Overview","local":"overview","sections":[],"depth":2},{"title":"How DETR works","local":"how-detr-works","sections":[],"depth":2},{"title":"Usage tips","local":"usage-tips","sections":[],"depth":2},{"title":"Resources","local":"resources","sections":[],"depth":2},{"title":"DetrConfig","local":"transformers.DetrConfig","sections":[],"depth":2},{"title":"DetrImageProcessor","local":"transformers.DetrImageProcessor","sections":[],"depth":2},{"title":"DetrFeatureExtractor","local":"transformers.DetrFeatureExtractor","sections":[],"depth":2},{"title":"DETR specific outputs","local":"transformers.models.detr.modeling_detr.DetrModelOutput","sections":[],"depth":2},{"title":"DetrModel","local":"transformers.DetrModel","sections":[],"depth":2},{"title":"DetrForObjectDetection","local":"transformers.DetrForObjectDetection","sections":[],"depth":2},{"title":"DetrForSegmentation","local":"transformers.DetrForSegmentation","sections":[],"depth":2}],"depth":1}';function hr(z){return Ys(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Tr extends Qs{constructor(c){super(),Ks(this,c,hr,lr,Xs,{})}}export{Tr as component}; | |
Xet Storage Details
- Size:
- 170 kB
- Xet hash:
- e6938828d2f46d1a3e0876d6c9485cb95de9e754240be401056923df4734f0ed
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.