Buckets:

hf-doc-build/doc-dev / transformers /main /zh /perf_torch_compile.html
rtrm's picture
download
raw
67.7 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;使用 torch.compile() 优化推理&quot;,&quot;local&quot;:&quot;使用-torchcompile-优化推理&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;torch.compile 的优势&quot;,&quot;local&quot;:&quot;torchcompile-的优势&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;基准测试代码&quot;,&quot;local&quot;:&quot;基准测试代码&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;使用 ViT 进行图像分类&quot;,&quot;local&quot;:&quot;使用-vit-进行图像分类&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;使用 DETR 进行目标检测&quot;,&quot;local&quot;:&quot;使用-detr-进行目标检测&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;使用 Segformer 进行图像分割&quot;,&quot;local&quot;:&quot;使用-segformer-进行图像分割&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;A100 (batch size: 1)&quot;,&quot;local&quot;:&quot;a100-batch-size-1&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;A100 (batch size: 4)&quot;,&quot;local&quot;:&quot;a100-batch-size-4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;A100 (batch size: 16)&quot;,&quot;local&quot;:&quot;a100-batch-size-16&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100 (batch size: 1)&quot;,&quot;local&quot;:&quot;v100-batch-size-1&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100 (batch size: 4)&quot;,&quot;local&quot;:&quot;v100-batch-size-4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100 (batch size: 16)&quot;,&quot;local&quot;:&quot;v100-batch-size-16&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4 (batch size: 1)&quot;,&quot;local&quot;:&quot;t4-batch-size-1&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4 (batch size: 4)&quot;,&quot;local&quot;:&quot;t4-batch-size-4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4 (batch size: 16)&quot;,&quot;local&quot;:&quot;t4-batch-size-16&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PyTorch Nightly&quot;,&quot;local&quot;:&quot;pytorch-nightly&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;A100&quot;,&quot;local&quot;:&quot;a100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4&quot;,&quot;local&quot;:&quot;t4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100&quot;,&quot;local&quot;:&quot;v100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;降低开销&quot;,&quot;local&quot;:&quot;降低开销&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;A100&quot;,&quot;local&quot;:&quot;a100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4&quot;,&quot;local&quot;:&quot;t4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/zh/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/entry/start.a61b9c50.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/scheduler.9991993c.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/singletons.2822fe91.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/index.02cfeb18.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/paths.d66588b4.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/entry/app.99775688.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/index.7fc9a5e7.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/nodes/0.f4c5a5c1.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/nodes/49.06d32268.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/CodeBlock.e11cba92.js">
<link rel="modulepreload" href="/docs/transformers/main/zh/_app/immutable/chunks/EditOnGithub.84ab7f0e.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;使用 torch.compile() 优化推理&quot;,&quot;local&quot;:&quot;使用-torchcompile-优化推理&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;torch.compile 的优势&quot;,&quot;local&quot;:&quot;torchcompile-的优势&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;基准测试代码&quot;,&quot;local&quot;:&quot;基准测试代码&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;使用 ViT 进行图像分类&quot;,&quot;local&quot;:&quot;使用-vit-进行图像分类&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;使用 DETR 进行目标检测&quot;,&quot;local&quot;:&quot;使用-detr-进行目标检测&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4},{&quot;title&quot;:&quot;使用 Segformer 进行图像分割&quot;,&quot;local&quot;:&quot;使用-segformer-进行图像分割&quot;,&quot;sections&quot;:[],&quot;depth&quot;:4}],&quot;depth&quot;:3},{&quot;title&quot;:&quot;A100 (batch size: 1)&quot;,&quot;local&quot;:&quot;a100-batch-size-1&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;A100 (batch size: 4)&quot;,&quot;local&quot;:&quot;a100-batch-size-4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;A100 (batch size: 16)&quot;,&quot;local&quot;:&quot;a100-batch-size-16&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100 (batch size: 1)&quot;,&quot;local&quot;:&quot;v100-batch-size-1&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100 (batch size: 4)&quot;,&quot;local&quot;:&quot;v100-batch-size-4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100 (batch size: 16)&quot;,&quot;local&quot;:&quot;v100-batch-size-16&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4 (batch size: 1)&quot;,&quot;local&quot;:&quot;t4-batch-size-1&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4 (batch size: 4)&quot;,&quot;local&quot;:&quot;t4-batch-size-4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4 (batch size: 16)&quot;,&quot;local&quot;:&quot;t4-batch-size-16&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;PyTorch Nightly&quot;,&quot;local&quot;:&quot;pytorch-nightly&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;A100&quot;,&quot;local&quot;:&quot;a100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4&quot;,&quot;local&quot;:&quot;t4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;V100&quot;,&quot;local&quot;:&quot;v100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;降低开销&quot;,&quot;local&quot;:&quot;降低开销&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;A100&quot;,&quot;local&quot;:&quot;a100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;T4&quot;,&quot;local&quot;:&quot;t4&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="使用-torchcompile-优化推理" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用-torchcompile-优化推理"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 torch.compile() 优化推理</span></h1> <p data-svelte-h="svelte-13rra8p">本指南旨在为使用<a href="https://pytorch.org/tutorials/intermediate/torch_compile_tutorial.html" rel="nofollow"><code>torch.compile()</code></a><a href="https://huggingface.co/models?pipeline_tag=image-classification&library=transformers&sort=trending" rel="nofollow">🤗 Transformers中的计算机视觉模型</a>中引入的推理速度提升提供一个基准。</p> <h2 class="relative group"><a id="torchcompile-的优势" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torchcompile-的优势"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>torch.compile 的优势</span></h2> <p data-svelte-h="svelte-1v035c2">根据模型和GPU的不同,<code>torch.compile()</code>在推理过程中可以提高多达30%的速度。要使用<code>torch.compile()</code>,只需安装2.0及以上版本的<code>torch</code>即可。</p> <p data-svelte-h="svelte-69z0gw">编译模型需要时间,因此如果您只需要编译一次模型而不是每次推理都编译,那么它非常有用。
要编译您选择的任何计算机视觉模型,请按照以下方式调用<code>torch.compile()</code></p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->from transformers import AutoModelForImageClassification
model = AutoModelForImageClassification.from_pretrained(MODEL_ID).to(&quot;cuda&quot;)
<span class="hljs-addition">+ model = torch.compile(model)</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ady6mu"><code>compile()</code> 提供了多种编译模式,它们在编译时间和推理开销上有所不同。<code>max-autotune</code><code>reduce-overhead</code> 需要更长的时间,但会得到更快的推理速度。默认模式在编译时最快,但在推理时间上与 <code>reduce-overhead</code> 相比效率较低。在本指南中,我们使用了默认模式。您可以在<a href="https://pytorch.org/get-started/pytorch-2.0/#user-experience" rel="nofollow">这里</a>了解更多信息。</p> <p data-svelte-h="svelte-cqb370">我们在 PyTorch 2.0.1 版本上使用不同的计算机视觉模型、任务、硬件类型和数据批量大小对 <code>torch.compile</code> 进行了基准测试。</p> <h2 class="relative group"><a id="基准测试代码" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#基准测试代码"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>基准测试代码</span></h2> <p data-svelte-h="svelte-1jvd763">以下是每个任务的基准测试代码。我们在推理之前”预热“GPU,并取300次推理的平均值,每次使用相同的图像。</p> <h3 class="relative group"><a id="使用-vit-进行图像分类" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用-vit-进行图像分类"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 ViT 进行图像分类</span></h3> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-keyword">import</span> requests
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor, AutoModelForImageClassification
url = <span class="hljs-string">&#x27;http://images.cocodataset.org/val2017/000000039769.jpg&#x27;</span>
image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw)
processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>)
model = AutoModelForImageClassification.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
model = torch.<span class="hljs-built_in">compile</span>(model)
processed_input = processor(image, return_tensors=<span class="hljs-string">&#x27;pt&#x27;</span>).to(device=<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-keyword">with</span> torch.no_grad():
_ = model(**processed_input)
<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="使用-detr-进行目标检测" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用-detr-进行目标检测"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 DETR 进行目标检测</span></h4> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor, AutoModelForObjectDetection
processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;facebook/detr-resnet-50&quot;</span>)
model = AutoModelForObjectDetection.from_pretrained(<span class="hljs-string">&quot;facebook/detr-resnet-50&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
model = torch.<span class="hljs-built_in">compile</span>(model)
texts = [<span class="hljs-string">&quot;a photo of a cat&quot;</span>, <span class="hljs-string">&quot;a photo of a dog&quot;</span>]
inputs = processor(text=texts, images=image, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-keyword">with</span> torch.no_grad():
_ = model(**inputs)<!-- HTML_TAG_END --></pre></div> <h4 class="relative group"><a id="使用-segformer-进行图像分割" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用-segformer-进行图像分割"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 Segformer 进行图像分割</span></h4> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> SegformerImageProcessor, SegformerForSemanticSegmentation
processor = SegformerImageProcessor.from_pretrained(<span class="hljs-string">&quot;nvidia/segformer-b0-finetuned-ade-512-512&quot;</span>)
model = SegformerForSemanticSegmentation.from_pretrained(<span class="hljs-string">&quot;nvidia/segformer-b0-finetuned-ade-512-512&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
model = torch.<span class="hljs-built_in">compile</span>(model)
seg_inputs = processor(images=image, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>).to(<span class="hljs-string">&quot;cuda&quot;</span>)
<span class="hljs-keyword">with</span> torch.no_grad():
_ = model(**seg_inputs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12xtxdc">以下是我们进行基准测试的模型列表。</p> <p data-svelte-h="svelte-1ugcf6x"><strong>图像分类</strong></p> <ul data-svelte-h="svelte-i1xpay"><li><a href="https://huggingface.co/google/vit-base-patch16-224" rel="nofollow">google/vit-base-patch16-224</a></li> <li><a href="https://huggingface.co/microsoft/beit-base-patch16-224-pt22k-ft22k" rel="nofollow">microsoft/beit-base-patch16-224-pt22k-ft22k</a></li> <li><a href="https://huggingface.co/facebook/convnext-large-224" rel="nofollow">facebook/convnext-large-224</a></li> <li><a href="https://huggingface.co/" rel="nofollow">microsoft/resnet-50</a></li></ul> <p data-svelte-h="svelte-1fqqj48"><strong>图像分割</strong></p> <ul data-svelte-h="svelte-1vcuz7e"><li><a href="https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512" rel="nofollow">nvidia/segformer-b0-finetuned-ade-512-512</a></li> <li><a href="https://huggingface.co/facebook/mask2former-swin-tiny-coco-panoptic" rel="nofollow">facebook/mask2former-swin-tiny-coco-panoptic</a></li> <li><a href="https://huggingface.co/facebook/maskformer-swin-base-ade" rel="nofollow">facebook/maskformer-swin-base-ade</a></li> <li><a href="https://huggingface.co/google/deeplabv3_mobilenet_v2_1.0_513" rel="nofollow">google/deeplabv3_mobilenet_v2_1.0_513</a></li></ul> <p data-svelte-h="svelte-1xfnhbz"><strong>目标检测</strong></p> <ul data-svelte-h="svelte-rn46wh"><li><p><a href="https://huggingface.co/google/owlvit-base-patch32" rel="nofollow">google/owlvit-base-patch32</a></p></li> <li><p><a href="https://huggingface.co/facebook/detr-resnet-101" rel="nofollow">facebook/detr-resnet-101</a></p></li> <li><p><a href="https://huggingface.co/microsoft/conditional-detr-resnet-50" rel="nofollow">microsoft/conditional-detr-resnet-50</a></p> <p>下面是使用和不使用<code>torch.compile()</code>的推理持续时间可视化,以及每个模型在不同硬件和数据批量大小下的改进百分比。</p></li></ul> <div class="flex" data-svelte-h="svelte-1jw9wmi"><div><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/torch_compile/a100_batch_comp.png"></div> <div><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/torch_compile/v100_batch_comp.png"></div> <div><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/torch_compile/t4_batch_comp.png"></div></div> <div class="flex" data-svelte-h="svelte-nlzsqo"><div><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/torch_compile/A100_1_duration.png"></div> <div><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/torch_compile/A100_1_percentage.png"></div></div> <p data-svelte-h="svelte-gdeipd"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/torch_compile/v100_1_duration.png" alt="Duration Comparison on V100 with Batch Size of 1"></p> <p data-svelte-h="svelte-1cusdpa"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/torch_compile/T4_4_percentage.png" alt="Percentage Improvement on T4 with Batch Size of 4"></p> <p data-svelte-h="svelte-1ju7fpm">下面可以找到每个模型使用和不使用<code>compile()</code>的推理时间(毫秒)。请注意,OwlViT在大批量大小下会导致内存溢出。</p> <h3 class="relative group"><a id="a100-batch-size-1" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a100-batch-size-1"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A100 (batch size: 1)</span></h3> <table data-svelte-h="svelte-6uvhqg"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">9.325</td> <td align="center">7.584</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">11.759</td> <td align="center">10.500</td></tr> <tr><td align="center">Object Detection/OwlViT</td> <td align="center">24.978</td> <td align="center">18.420</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">11.282</td> <td align="center">8.448</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">34.619</td> <td align="center">19.040</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">10.410</td> <td align="center">10.208</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">6.531</td> <td align="center">4.124</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">60.188</td> <td align="center">49.117</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">75.764</td> <td align="center">59.487</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">8.583</td> <td align="center">3.974</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">36.276</td> <td align="center">18.197</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">31.219</td> <td align="center">17.993</td></tr></tbody></table> <h3 class="relative group"><a id="a100-batch-size-4" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a100-batch-size-4"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A100 (batch size: 4)</span></h3> <table data-svelte-h="svelte-f4zjoc"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">14.832</td> <td align="center">14.499</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">18.838</td> <td align="center">16.476</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">13.205</td> <td align="center">13.048</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">48.657</td> <td align="center">32.418</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">22.940</td> <td align="center">21.631</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">6.657</td> <td align="center">4.268</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">74.277</td> <td align="center">61.781</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">180.700</td> <td align="center">159.116</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">14.174</td> <td align="center">8.515</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">68.101</td> <td align="center">44.998</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">56.470</td> <td align="center">35.552</td></tr></tbody></table> <h3 class="relative group"><a id="a100-batch-size-16" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a100-batch-size-16"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A100 (batch size: 16)</span></h3> <table data-svelte-h="svelte-9ju0ii"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">40.944</td> <td align="center">40.010</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">37.005</td> <td align="center">31.144</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">41.854</td> <td align="center">41.048</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">164.382</td> <td align="center">161.902</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">82.258</td> <td align="center">75.561</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">7.018</td> <td align="center">5.024</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">178.945</td> <td align="center">154.814</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">638.570</td> <td align="center">579.826</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">51.693</td> <td align="center">30.310</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">232.887</td> <td align="center">155.021</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">180.491</td> <td align="center">124.032</td></tr></tbody></table> <h3 class="relative group"><a id="v100-batch-size-1" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#v100-batch-size-1"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>V100 (batch size: 1)</span></h3> <table data-svelte-h="svelte-18ncoxq"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">10.495</td> <td align="center">6.00</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">13.321</td> <td align="center">5.862</td></tr> <tr><td align="center">Object Detection/OwlViT</td> <td align="center">25.769</td> <td align="center">22.395</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">11.347</td> <td align="center">7.234</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">33.951</td> <td align="center">19.388</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">11.623</td> <td align="center">10.412</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">6.484</td> <td align="center">3.820</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">64.640</td> <td align="center">49.873</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">95.532</td> <td align="center">72.207</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">9.217</td> <td align="center">4.753</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">52.818</td> <td align="center">28.367</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">39.512</td> <td align="center">20.816</td></tr></tbody></table> <h3 class="relative group"><a id="v100-batch-size-4" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#v100-batch-size-4"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>V100 (batch size: 4)</span></h3> <table data-svelte-h="svelte-15udyd3"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">15.181</td> <td align="center">14.501</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">16.787</td> <td align="center">16.188</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">15.171</td> <td align="center">14.753</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">88.529</td> <td align="center">64.195</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">29.574</td> <td align="center">27.085</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">6.109</td> <td align="center">4.731</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">90.402</td> <td align="center">76.926</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">234.261</td> <td align="center">205.456</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">24.623</td> <td align="center">14.816</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">134.672</td> <td align="center">101.304</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">97.464</td> <td align="center">69.739</td></tr></tbody></table> <h3 class="relative group"><a id="v100-batch-size-16" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#v100-batch-size-16"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>V100 (batch size: 16)</span></h3> <table data-svelte-h="svelte-rw07j7"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">52.209</td> <td align="center">51.633</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">61.013</td> <td align="center">55.499</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">53.938</td> <td align="center">53.581</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">OOM</td> <td align="center">OOM</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">109.682</td> <td align="center">100.771</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">14.857</td> <td align="center">12.089</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">249.605</td> <td align="center">222.801</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">831.142</td> <td align="center">743.645</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">93.129</td> <td align="center">55.365</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">482.425</td> <td align="center">361.843</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">344.661</td> <td align="center">255.298</td></tr></tbody></table> <h3 class="relative group"><a id="t4-batch-size-1" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t4-batch-size-1"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T4 (batch size: 1)</span></h3> <table data-svelte-h="svelte-37x5jw"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">16.520</td> <td align="center">15.786</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">16.116</td> <td align="center">14.205</td></tr> <tr><td align="center">Object Detection/OwlViT</td> <td align="center">53.634</td> <td align="center">51.105</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">16.464</td> <td align="center">15.710</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">73.100</td> <td align="center">53.99</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">32.932</td> <td align="center">30.845</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">6.031</td> <td align="center">4.321</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">79.192</td> <td align="center">66.815</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">200.026</td> <td align="center">188.268</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">18.908</td> <td align="center">11.997</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">106.622</td> <td align="center">82.566</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">77.594</td> <td align="center">56.984</td></tr></tbody></table> <h3 class="relative group"><a id="t4-batch-size-4" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t4-batch-size-4"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T4 (batch size: 4)</span></h3> <table data-svelte-h="svelte-1mc5027"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">43.653</td> <td align="center">43.626</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">45.327</td> <td align="center">42.445</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">52.007</td> <td align="center">51.354</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">277.850</td> <td align="center">268.003</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">119.259</td> <td align="center">105.580</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">13.039</td> <td align="center">11.388</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">201.540</td> <td align="center">184.670</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">764.052</td> <td align="center">711.280</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">74.289</td> <td align="center">48.677</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">421.859</td> <td align="center">357.614</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">289.002</td> <td align="center">226.945</td></tr></tbody></table> <h3 class="relative group"><a id="t4-batch-size-16" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t4-batch-size-16"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T4 (batch size: 16)</span></h3> <table data-svelte-h="svelte-10eiin7"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ViT</td> <td align="center">163.914</td> <td align="center">160.907</td></tr> <tr><td align="center">Image Segmentation/Segformer</td> <td align="center">192.412</td> <td align="center">163.620</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">188.978</td> <td align="center">187.976</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">OOM</td> <td align="center">OOM</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">422.886</td> <td align="center">388.078</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">44.114</td> <td align="center">37.604</td></tr> <tr><td align="center">Image Segmentation/Mask2former</td> <td align="center">756.337</td> <td align="center">695.291</td></tr> <tr><td align="center">Image Segmentation/Maskformer</td> <td align="center">2842.940</td> <td align="center">2656.88</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">299.003</td> <td align="center">201.942</td></tr> <tr><td align="center">Object Detection/Resnet-101</td> <td align="center">1619.505</td> <td align="center">1262.758</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">1137.513</td> <td align="center">897.390</td></tr></tbody></table> <h2 class="relative group"><a id="pytorch-nightly" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#pytorch-nightly"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>PyTorch Nightly</span></h2> <p data-svelte-h="svelte-pkjs63">我们还在 PyTorch Nightly 版本(2.1.0dev)上进行了基准测试,可以在<a href="https://download.pytorch.org/whl/nightly/cu118" rel="nofollow">这里</a>找到 Nightly 版本的安装包,并观察到了未编译和编译模型的延迟性能改善。</p> <h3 class="relative group"><a id="a100" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a100"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A100</span></h3> <table data-svelte-h="svelte-1cg5nyy"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>Batch Size</strong></th> <th align="center"><strong>torch 2.0 - no compile</strong></th> <th align="center"><strong>torch 2.0 -<br> compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/BeiT</td> <td align="center">Unbatched</td> <td align="center">12.462</td> <td align="center">6.954</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">4</td> <td align="center">14.109</td> <td align="center">12.851</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">16</td> <td align="center">42.179</td> <td align="center">42.147</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">Unbatched</td> <td align="center">30.484</td> <td align="center">15.221</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">4</td> <td align="center">46.816</td> <td align="center">30.942</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">16</td> <td align="center">163.749</td> <td align="center">163.706</td></tr></tbody></table> <h3 class="relative group"><a id="t4" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t4"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T4</span></h3> <table data-svelte-h="svelte-1nlzppe"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>Batch Size</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/BeiT</td> <td align="center">Unbatched</td> <td align="center">14.408</td> <td align="center">14.052</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">4</td> <td align="center">47.381</td> <td align="center">46.604</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">16</td> <td align="center">42.179</td> <td align="center">42.147</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">Unbatched</td> <td align="center">68.382</td> <td align="center">53.481</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">4</td> <td align="center">269.615</td> <td align="center">204.785</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">16</td> <td align="center">OOM</td> <td align="center">OOM</td></tr></tbody></table> <h3 class="relative group"><a id="v100" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#v100"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>V100</span></h3> <table data-svelte-h="svelte-ok1p6e"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>Batch Size</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/BeiT</td> <td align="center">Unbatched</td> <td align="center">13.477</td> <td align="center">7.926</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">4</td> <td align="center">15.103</td> <td align="center">14.378</td></tr> <tr><td align="center">Image Classification/BeiT</td> <td align="center">16</td> <td align="center">52.517</td> <td align="center">51.691</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">Unbatched</td> <td align="center">28.706</td> <td align="center">19.077</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">4</td> <td align="center">88.402</td> <td align="center">62.949</td></tr> <tr><td align="center">Object Detection/DETR</td> <td align="center">16</td> <td align="center">OOM</td> <td align="center">OOM</td></tr></tbody></table> <h2 class="relative group"><a id="降低开销" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#降低开销"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>降低开销</span></h2> <p data-svelte-h="svelte-vw78ma">我们在 PyTorch Nightly 版本中为 A100 和 T4 进行了 <code>reduce-overhead</code> 编译模式的性能基准测试。</p> <h3 class="relative group"><a id="a100" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#a100"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>A100</span></h3> <table data-svelte-h="svelte-13rnx0"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>Batch Size</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">Unbatched</td> <td align="center">11.758</td> <td align="center">7.335</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">4</td> <td align="center">23.171</td> <td align="center">21.490</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">Unbatched</td> <td align="center">7.435</td> <td align="center">3.801</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">4</td> <td align="center">7.261</td> <td align="center">2.187</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">Unbatched</td> <td align="center">32.823</td> <td align="center">11.627</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">4</td> <td align="center">50.622</td> <td align="center">33.831</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">Unbatched</td> <td align="center">9.869</td> <td align="center">4.244</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">4</td> <td align="center">14.385</td> <td align="center">7.946</td></tr></tbody></table> <h3 class="relative group"><a id="t4" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#t4"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>T4</span></h3> <table data-svelte-h="svelte-oh2zql"><thead><tr><th align="center"><strong>Task/Model</strong></th> <th align="center"><strong>Batch Size</strong></th> <th align="center"><strong>torch 2.0 - <br>no compile</strong></th> <th align="center"><strong>torch 2.0 - <br>compile</strong></th></tr></thead> <tbody><tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">Unbatched</td> <td align="center">32.137</td> <td align="center">31.84</td></tr> <tr><td align="center">Image Classification/ConvNeXT</td> <td align="center">4</td> <td align="center">120.944</td> <td align="center">110.209</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">Unbatched</td> <td align="center">9.761</td> <td align="center">7.698</td></tr> <tr><td align="center">Image Classification/ResNet</td> <td align="center">4</td> <td align="center">15.215</td> <td align="center">13.871</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">Unbatched</td> <td align="center">72.150</td> <td align="center">57.660</td></tr> <tr><td align="center">Object Detection/Conditional-DETR</td> <td align="center">4</td> <td align="center">301.494</td> <td align="center">247.543</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">Unbatched</td> <td align="center">22.266</td> <td align="center">19.339</td></tr> <tr><td align="center">Image Segmentation/MobileNet</td> <td align="center">4</td> <td align="center">78.311</td> <td align="center">50.983</td></tr></tbody></table> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/zh/perf_torch_compile.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_173uja2 = {
assets: "/docs/transformers/main/zh",
base: "/docs/transformers/main/zh",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/zh/_app/immutable/entry/start.a61b9c50.js"),
import("/docs/transformers/main/zh/_app/immutable/entry/app.99775688.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 49],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
67.7 kB
·
Xet hash:
eda96cb84ffefdcf47e27abc03203e17e9f2238e3d34f54c5d8c597da918b353

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.