Buckets:

rtrm's picture
download
raw
12.8 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Supported models and hardware&quot;,&quot;local&quot;:&quot;supported-models-and-hardware&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Supported embeddings models&quot;,&quot;local&quot;:&quot;supported-embeddings-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Supported re-rankers and sequence classification models&quot;,&quot;local&quot;:&quot;supported-re-rankers-and-sequence-classification-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Supported hardware&quot;,&quot;local&quot;:&quot;supported-hardware&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/text-embeddings-inference/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/entry/start.5929ce5e.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/chunks/scheduler.b108d059.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/chunks/singletons.8ba50d2e.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/chunks/paths.0433c982.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/entry/app.99d3b526.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/chunks/index.008de539.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/nodes/0.edd78360.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/nodes/12.efbbb03f.js">
<link rel="modulepreload" href="/docs/text-embeddings-inference/main/en/_app/immutable/chunks/EditOnGithub.d1c48e3d.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Supported models and hardware&quot;,&quot;local&quot;:&quot;supported-models-and-hardware&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Supported embeddings models&quot;,&quot;local&quot;:&quot;supported-embeddings-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Supported re-rankers and sequence classification models&quot;,&quot;local&quot;:&quot;supported-re-rankers-and-sequence-classification-models&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Supported hardware&quot;,&quot;local&quot;:&quot;supported-hardware&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="supported-models-and-hardware" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supported-models-and-hardware"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supported models and hardware</span></h1> <p data-svelte-h="svelte-1k6m0ht">We are continually expanding our support for other model types and plan to include them in future updates.</p> <h2 class="relative group"><a id="supported-embeddings-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supported-embeddings-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supported embeddings models</span></h2> <p data-svelte-h="svelte-wffkmp">Text Embeddings Inference currently supports BERT, CamemBERT, XLM-RoBERTa models with absolute positions and JinaBERT
model with Alibi positions.</p> <p data-svelte-h="svelte-vskdni">Below are some examples of the currently supported models:</p> <table data-svelte-h="svelte-aq53hm"><thead><tr><th>MTEB Rank</th> <th>Model Type</th> <th>Model ID</th></tr></thead> <tbody><tr><td>6</td> <td>Bert</td> <td><a href="https://hf.co/WhereIsAI/UAE-Large-V1" rel="nofollow">WhereIsAI/UAE-Large-V1</a></td></tr> <tr><td>10</td> <td>XLM-RoBERTa</td> <td><a href="https://hf.co/intfloat/multilingual-e5-large-instruct" rel="nofollow">intfloat/multilingual-e5-large-instruct</a></td></tr> <tr><td>N/A</td> <td>NomicBert</td> <td><a href="https://hf.co/nomic-ai/nomic-embed-text-v1" rel="nofollow">nomic-ai/nomic-embed-text-v1</a></td></tr> <tr><td>N/A</td> <td>NomicBert</td> <td><a href="https://hf.co/nomic-ai/nomic-embed-text-v1.5" rel="nofollow">nomic-ai/nomic-embed-text-v1.5</a></td></tr> <tr><td>N/A</td> <td>JinaBERT</td> <td><a href="https://hf.co/jinaai/jina-embeddings-v2-base-en" rel="nofollow">jinaai/jina-embeddings-v2-base-en</a></td></tr> <tr><td>N/A</td> <td>JinaBERT</td> <td><a href="https://hf.co/jniaai/jina-embeddings-v2-base-code" rel="nofollow">jinaai/jina-embeddings-v2-base-code</a></td></tr></tbody></table> <p data-svelte-h="svelte-1fz6qfa">To explore the list of best performing text embeddings models, visit the
<a href="https://huggingface.co/spaces/mteb/leaderboard" rel="nofollow">Massive Text Embedding Benchmark (MTEB) Leaderboard</a>.</p> <h2 class="relative group"><a id="supported-re-rankers-and-sequence-classification-models" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supported-re-rankers-and-sequence-classification-models"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supported re-rankers and sequence classification models</span></h2> <p data-svelte-h="svelte-17enjvk">Text Embeddings Inference currently supports CamemBERT, and XLM-RoBERTa Sequence Classification models with absolute positions.</p> <p data-svelte-h="svelte-vskdni">Below are some examples of the currently supported models:</p> <table data-svelte-h="svelte-ze9dp3"><thead><tr><th>Task</th> <th>Model Type</th> <th>Model ID</th> <th>Revision</th></tr></thead> <tbody><tr><td>Re-Ranking</td> <td>XLM-RoBERTa</td> <td><a href="https://huggingface.co/BAAI/bge-reranker-large" rel="nofollow">BAAI/bge-reranker-large</a></td> <td><code>refs/pr/4</code></td></tr> <tr><td>Re-Ranking</td> <td>XLM-RoBERTa</td> <td><a href="https://huggingface.co/BAAI/bge-reranker-base" rel="nofollow">BAAI/bge-reranker-base</a></td> <td><code>refs/pr/5</code></td></tr> <tr><td>Sentiment Analysis</td> <td>RoBERTa</td> <td><a href="https://huggingface.co/SamLowe/roberta-base-go_emotions" rel="nofollow">SamLowe/roberta-base-go_emotions</a></td> <td></td></tr></tbody></table> <h2 class="relative group"><a id="supported-hardware" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#supported-hardware"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Supported hardware</span></h2> <p data-svelte-h="svelte-58o6ad">Text Embeddings Inference supports can be used on CPU, Turing (T4, RTX 2000 series, …), Ampere 80 (A100, A30),
Ampere 86 (A10, A40, …), Ada Lovelace (RTX 4000 series, …), and Hopper (H100) architectures.</p> <p data-svelte-h="svelte-1q49upw">The library does <strong>not</strong> support CUDA compute capabilities &lt; 7.5, which means V100, Titan V, GTX 1000 series, etc. are not supported.
To leverage your GPUs, make sure to install the
<a href="https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html" rel="nofollow">NVIDIA Container Toolkit</a>, and use
NVIDIA drivers with CUDA version 12.2 or higher.</p> <p data-svelte-h="svelte-16dfo8x">Find the appropriate Docker image for your hardware in the following table:</p> <table data-svelte-h="svelte-f5zzzv"><thead><tr><th>Architecture</th> <th>Image</th></tr></thead> <tbody><tr><td>CPU</td> <td>ghcr.io/huggingface/text-embeddings-inference:cpu-1.2</td></tr> <tr><td>Volta</td> <td>NOT SUPPORTED</td></tr> <tr><td>Turing (T4, RTX 2000 series, …)</td> <td>ghcr.io/huggingface/text-embeddings-inference:turing-1.2 (experimental)</td></tr> <tr><td>Ampere 80 (A100, A30)</td> <td>ghcr.io/huggingface/text-embeddings-inference:1.2</td></tr> <tr><td>Ampere 86 (A10, A40, …)</td> <td>ghcr.io/huggingface/text-embeddings-inference:86-1.2</td></tr> <tr><td>Ada Lovelace (RTX 4000 series, …)</td> <td>ghcr.io/huggingface/text-embeddings-inference:89-1.2</td></tr> <tr><td>Hopper (H100)</td> <td>ghcr.io/huggingface/text-embeddings-inference:hopper-1.2 (experimental)</td></tr></tbody></table> <p data-svelte-h="svelte-173bv05"><strong>Warning</strong>: Flash Attention is turned off by default for the Turing image as it suffers from precision issues.
You can turn Flash Attention v1 ON by using the <code>USE_FLASH_ATTENTION=True</code> environment variable.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/text-embeddings-inference/blob/main/docs/source/en/supported_models.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1a9ukzg = {
assets: "/docs/text-embeddings-inference/main/en",
base: "/docs/text-embeddings-inference/main/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/text-embeddings-inference/main/en/_app/immutable/entry/start.5929ce5e.js"),
import("/docs/text-embeddings-inference/main/en/_app/immutable/entry/app.99d3b526.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 12],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
12.8 kB
·
Xet hash:
9c88f60b12a81ff7a816038396920f819fe77fb07612a7cf0e5c11bb4660770e

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.