Buckets:

HuggingFaceDocBuilder's picture
download
raw
23.6 kB
import{s as Cs,n as vs,o as Ws}from"../chunks/scheduler.6efaaf90.js";import{S as Ns,i as Gs,e as p,s as t,c as i,h as Rs,a as o,d as e,b as n,f as js,g as c,j as M,k as ys,l as us,m as a,n as r,t as d,o as y,p as m}from"../chunks/index.eb3e1f0f.js";import{C as qs,H as ms,E as Xs}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.16f13047.js";import{C as z}from"../chunks/CodeBlock.72c8dd07.js";function _s(Ts){let J,Y,x,V,h,$,w,A,U,hs="Before Transformers.js v3, we used the <code>quantized</code> option to specify whether to use a quantized (q8) or full-precision (fp32) variant of the model by setting <code>quantized</code> to <code>true</code> or <code>false</code>, respectively. Now, we’ve added the ability to select from a much larger list with the <code>dtype</code> parameter.",H,b,ws="The list of available quantizations depends on the model, but some common ones are: full-precision (<code>&quot;fp32&quot;</code>), half-precision (<code>&quot;fp16&quot;</code>), 8-bit (<code>&quot;q8&quot;</code>, <code>&quot;int8&quot;</code>, <code>&quot;uint8&quot;</code>), and 4-bit (<code>&quot;q4&quot;</code>, <code>&quot;bnb4&quot;</code>, <code>&quot;q4f16&quot;</code>).",S,j,Us='<picture><source media="(prefers-color-scheme: dark)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/transformersjs-v3/dtypes-dark.jpg" style="max-width: 100%;"/> <source media="(prefers-color-scheme: light)" srcset="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/transformersjs-v3/dtypes-light.jpg" style="max-width: 100%;"/> <img alt="Available dtypes for mixedbread-ai/mxbai-embed-xsmall-v1" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/transformersjs-v3/dtypes-dark.jpg" style="max-width: 100%;"/></picture> <a href="https://huggingface.co/mixedbread-ai/mxbai-embed-xsmall-v1/tree/main/onnx">(e.g., mixedbread-ai/mxbai-embed-xsmall-v1)</a>',F,f,L,g,bs='<strong>Example:</strong> Run Qwen2.5-0.5B-Instruct in 4-bit quantization (<a href="https://v2.scrimba.com/s0dlcpv0ci" rel="nofollow">demo</a>)',D,I,P,Z,O,B,fs="Not sure which quantizations a model offers? Use <code>ModelRegistry.get_available_dtypes()</code> to probe the repository and find out:",K,k,ss,Q,gs="This checks which ONNX files exist on the Hugging Face Hub for each dtype. For multi-session models (e.g., encoder-decoder), a dtype is only listed if <strong>all</strong> required session files are present.",ls,C,Is="You can use this to build UIs that let users pick a quantization level, or to automatically select the smallest available dtype:",es,v,as,W,ts,N,Zs="Some encoder-decoder models, like Whisper or Florence-2, are extremely sensitive to quantization settings: especially of the encoder. For this reason, we added the ability to select per-module dtypes, which can be done by providing a mapping from module name to dtype.",ns,G,Bs='<strong>Example:</strong> Run Florence-2 on WebGPU (<a href="https://v2.scrimba.com/s0pdm485fo" rel="nofollow">demo</a>)',ps,R,os,u,ks='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/transformersjs-v3/florence-2-webgpu.gif" alt="Florence-2 running on WebGPU"/>',is,T,_,Qs="See full code example",Js,q,cs,X,Ms,E,rs;return h=new qs({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),w=new ms({props:{title:"Using quantized models (dtypes)",local:"using-quantized-models-dtypes",headingTag:"h1"}}),f=new ms({props:{title:"Basic usage",local:"basic-usage",headingTag:"h2"}}),I=new z({props:{code:"aW1wb3J0JTIwJTdCJTIwcGlwZWxpbmUlMjAlN0QlMjBmcm9tJTIwJTIyJTQwaHVnZ2luZ2ZhY2UlMkZ0cmFuc2Zvcm1lcnMlMjIlM0IlMEElMEElMkYlMkYlMjBDcmVhdGUlMjBhJTIwdGV4dCUyMGdlbmVyYXRpb24lMjBwaXBlbGluZSUwQWNvbnN0JTIwZ2VuZXJhdG9yJTIwJTNEJTIwYXdhaXQlMjBwaXBlbGluZSglMEElMjAlMjAlMjJ0ZXh0LWdlbmVyYXRpb24lMjIlMkMlMEElMjAlMjAlMjJvbm54LWNvbW11bml0eSUyRlF3ZW4yLjUtMC41Qi1JbnN0cnVjdCUyMiUyQyUwQSUyMCUyMCU3QiUyMGR0eXBlJTNBJTIwJTIycTQlMjIlMkMlMjBkZXZpY2UlM0ElMjAlMjJ3ZWJncHUlMjIlMjAlN0QlMkMlMEEpJTNCJTBBJTBBJTJGJTJGJTIwRGVmaW5lJTIwdGhlJTIwbGlzdCUyMG9mJTIwbWVzc2FnZXMlMEFjb25zdCUyMG1lc3NhZ2VzJTIwJTNEJTIwJTVCJTBBJTIwJTIwJTdCJTIwcm9sZSUzQSUyMCUyMnN5c3RlbSUyMiUyQyUyMGNvbnRlbnQlM0ElMjAlMjJZb3UlMjBhcmUlMjBhJTIwaGVscGZ1bCUyMGFzc2lzdGFudC4lMjIlMjAlN0QlMkMlMEElMjAlMjAlN0IlMjByb2xlJTNBJTIwJTIydXNlciUyMiUyQyUyMGNvbnRlbnQlM0ElMjAlMjJUZWxsJTIwbWUlMjBhJTIwZnVubnklMjBqb2tlLiUyMiUyMCU3RCUyQyUwQSU1RCUzQiUwQSUwQSUyRiUyRiUyMEdlbmVyYXRlJTIwYSUyMHJlc3BvbnNlJTBBY29uc3QlMjBvdXRwdXQlMjAlM0QlMjBhd2FpdCUyMGdlbmVyYXRvcihtZXNzYWdlcyUyQyUyMCU3QiUyMG1heF9uZXdfdG9rZW5zJTNBJTIwMTI4JTIwJTdEKSUzQiUwQWNvbnNvbGUubG9nKG91dHB1dCU1QjAlNUQuZ2VuZXJhdGVkX3RleHQuYXQoLTEpLmNvbnRlbnQpJTNC",highlighted:`<span class="hljs-keyword">import</span> { pipeline } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/transformers&quot;</span>;
<span class="hljs-comment">// Create a text generation pipeline</span>
<span class="hljs-keyword">const</span> generator = <span class="hljs-keyword">await</span> <span class="hljs-title function_">pipeline</span>(
<span class="hljs-string">&quot;text-generation&quot;</span>,
<span class="hljs-string">&quot;onnx-community/Qwen2.5-0.5B-Instruct&quot;</span>,
{ <span class="hljs-attr">dtype</span>: <span class="hljs-string">&quot;q4&quot;</span>, <span class="hljs-attr">device</span>: <span class="hljs-string">&quot;webgpu&quot;</span> },
);
<span class="hljs-comment">// Define the list of messages</span>
<span class="hljs-keyword">const</span> messages = [
{ <span class="hljs-attr">role</span>: <span class="hljs-string">&quot;system&quot;</span>, <span class="hljs-attr">content</span>: <span class="hljs-string">&quot;You are a helpful assistant.&quot;</span> },
{ <span class="hljs-attr">role</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-attr">content</span>: <span class="hljs-string">&quot;Tell me a funny joke.&quot;</span> },
];
<span class="hljs-comment">// Generate a response</span>
<span class="hljs-keyword">const</span> output = <span class="hljs-keyword">await</span> <span class="hljs-title function_">generator</span>(messages, { <span class="hljs-attr">max_new_tokens</span>: <span class="hljs-number">128</span> });
<span class="hljs-variable language_">console</span>.<span class="hljs-title function_">log</span>(output[<span class="hljs-number">0</span>].<span class="hljs-property">generated_text</span>.<span class="hljs-title function_">at</span>(-<span class="hljs-number">1</span>).<span class="hljs-property">content</span>);`,wrap:!1}}),Z=new ms({props:{title:"Detecting available dtypes",local:"detecting-available-dtypes",headingTag:"h2"}}),k=new z({props:{code:"aW1wb3J0JTIwJTdCJTIwTW9kZWxSZWdpc3RyeSUyMCU3RCUyMGZyb20lMjAlMjIlNDBodWdnaW5nZmFjZSUyRnRyYW5zZm9ybWVycyUyMiUzQiUwQSUwQWNvbnN0JTIwZHR5cGVzJTIwJTNEJTIwYXdhaXQlMjBNb2RlbFJlZ2lzdHJ5LmdldF9hdmFpbGFibGVfZHR5cGVzKCUyMm9ubngtY29tbXVuaXR5JTJGYWxsLU1pbmlMTS1MNi12Mi1PTk5YJTIyKSUzQiUwQWNvbnNvbGUubG9nKGR0eXBlcyklM0IlMjAlMkYlMkYlMjBlLmcuJTJDJTIwJTVCJTIwJ2ZwMzInJTJDJTIwJ2ZwMTYnJTJDJTIwJ2ludDgnJTJDJTIwJ3VpbnQ4JyUyQyUyMCdxOCclMkMlMjAncTQnJTIwJTVE",highlighted:`<span class="hljs-keyword">import</span> { <span class="hljs-title class_">ModelRegistry</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/transformers&quot;</span>;
<span class="hljs-keyword">const</span> dtypes = <span class="hljs-keyword">await</span> <span class="hljs-title class_">ModelRegistry</span>.<span class="hljs-title function_">get_available_dtypes</span>(<span class="hljs-string">&quot;onnx-community/all-MiniLM-L6-v2-ONNX&quot;</span>);
<span class="hljs-variable language_">console</span>.<span class="hljs-title function_">log</span>(dtypes); <span class="hljs-comment">// e.g., [ &#x27;fp32&#x27;, &#x27;fp16&#x27;, &#x27;int8&#x27;, &#x27;uint8&#x27;, &#x27;q8&#x27;, &#x27;q4&#x27; ]</span>`,wrap:!1}}),v=new z({props:{code:"Y29uc3QlMjBkdHlwZXMlMjAlM0QlMjBhd2FpdCUyME1vZGVsUmVnaXN0cnkuZ2V0X2F2YWlsYWJsZV9kdHlwZXMoJTIyb25ueC1jb21tdW5pdHklMkZRd2VuMy0wLjZCLU9OTlglMjIpJTNCJTBBJTBBJTJGJTJGJTIwUGljayUyMHRoZSUyMHNtYWxsZXN0JTIwYXZhaWxhYmxlJTIwcXVhbnRpemF0aW9uJTJDJTIwZmFsbGluZyUyMGJhY2slMjB0byUyMGZwMzIlMEFjb25zdCUyMHByZWZlcnJlZCUyMCUzRCUyMCU1QiUyMnE0JTIyJTJDJTIwJTIycTglMjIlMkMlMjAlMjJmcDE2JTIyJTJDJTIwJTIyZnAzMiUyMiU1RCUzQiUwQWNvbnN0JTIwZHR5cGUlMjAlM0QlMjBwcmVmZXJyZWQuZmluZCgoZCklMjAlM0QlM0UlMjBkdHlwZXMuaW5jbHVkZXMoZCkpJTIwJTNGJTNGJTIwJTIyZnAzMiUyMiUzQiUwQSUwQWNvbnN0JTIwZ2VuZXJhdG9yJTIwJTNEJTIwYXdhaXQlMjBwaXBlbGluZSglMjJ0ZXh0LWdlbmVyYXRpb24lMjIlMkMlMjAlMjJvbm54LWNvbW11bml0eSUyRlF3ZW4zLTAuNkItT05OWCUyMiUyQyUyMCU3QiUyMGR0eXBlJTIwJTdEKSUzQg==",highlighted:`<span class="hljs-keyword">const</span> dtypes = <span class="hljs-keyword">await</span> <span class="hljs-title class_">ModelRegistry</span>.<span class="hljs-title function_">get_available_dtypes</span>(<span class="hljs-string">&quot;onnx-community/Qwen3-0.6B-ONNX&quot;</span>);
<span class="hljs-comment">// Pick the smallest available quantization, falling back to fp32</span>
<span class="hljs-keyword">const</span> preferred = [<span class="hljs-string">&quot;q4&quot;</span>, <span class="hljs-string">&quot;q8&quot;</span>, <span class="hljs-string">&quot;fp16&quot;</span>, <span class="hljs-string">&quot;fp32&quot;</span>];
<span class="hljs-keyword">const</span> dtype = preferred.<span class="hljs-title function_">find</span>(<span class="hljs-function">(<span class="hljs-params">d</span>) =&gt;</span> dtypes.<span class="hljs-title function_">includes</span>(d)) ?? <span class="hljs-string">&quot;fp32&quot;</span>;
<span class="hljs-keyword">const</span> generator = <span class="hljs-keyword">await</span> <span class="hljs-title function_">pipeline</span>(<span class="hljs-string">&quot;text-generation&quot;</span>, <span class="hljs-string">&quot;onnx-community/Qwen3-0.6B-ONNX&quot;</span>, { dtype });`,wrap:!1}}),W=new ms({props:{title:"Per-module dtypes",local:"per-module-dtypes",headingTag:"h2"}}),R=new z({props:{code:"aW1wb3J0JTIwJTdCJTIwRmxvcmVuY2UyRm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uJTIwJTdEJTIwZnJvbSUyMCUyMiU0MGh1Z2dpbmdmYWNlJTJGdHJhbnNmb3JtZXJzJTIyJTNCJTBBJTBBY29uc3QlMjBtb2RlbCUyMCUzRCUyMGF3YWl0JTIwRmxvcmVuY2UyRm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uLmZyb21fcHJldHJhaW5lZCglMEElMjAlMjAlMjJvbm54LWNvbW11bml0eSUyRkZsb3JlbmNlLTItYmFzZS1mdCUyMiUyQyUwQSUyMCUyMCU3QiUwQSUyMCUyMCUyMCUyMGR0eXBlJTNBJTIwJTdCJTBBJTIwJTIwJTIwJTIwJTIwJTIwZW1iZWRfdG9rZW5zJTNBJTIwJTIyZnAxNiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMHZpc2lvbl9lbmNvZGVyJTNBJTIwJTIyZnAxNiUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMGVuY29kZXJfbW9kZWwlM0ElMjAlMjJxNCUyMiUyQyUwQSUyMCUyMCUyMCUyMCUyMCUyMGRlY29kZXJfbW9kZWxfbWVyZ2VkJTNBJTIwJTIycTQlMjIlMkMlMEElMjAlMjAlMjAlMjAlN0QlMkMlMEElMjAlMjAlMjAlMjBkZXZpY2UlM0ElMjAlMjJ3ZWJncHUlMjIlMkMlMEElMjAlMjAlN0QlMkMlMEEpJTNC",highlighted:`<span class="hljs-keyword">import</span> { <span class="hljs-title class_">Florence2ForConditionalGeneration</span> } <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/transformers&quot;</span>;
<span class="hljs-keyword">const</span> model = <span class="hljs-keyword">await</span> <span class="hljs-title class_">Florence2ForConditionalGeneration</span>.<span class="hljs-title function_">from_pretrained</span>(
<span class="hljs-string">&quot;onnx-community/Florence-2-base-ft&quot;</span>,
{
<span class="hljs-attr">dtype</span>: {
<span class="hljs-attr">embed_tokens</span>: <span class="hljs-string">&quot;fp16&quot;</span>,
<span class="hljs-attr">vision_encoder</span>: <span class="hljs-string">&quot;fp16&quot;</span>,
<span class="hljs-attr">encoder_model</span>: <span class="hljs-string">&quot;q4&quot;</span>,
<span class="hljs-attr">decoder_model_merged</span>: <span class="hljs-string">&quot;q4&quot;</span>,
},
<span class="hljs-attr">device</span>: <span class="hljs-string">&quot;webgpu&quot;</span>,
},
);`,wrap:!1}}),q=new z({props:{code:"aW1wb3J0JTIwJTdCJTBBJTIwJTIwRmxvcmVuY2UyRm9yQ29uZGl0aW9uYWxHZW5lcmF0aW9uJTJDJTBBJTIwJTIwQXV0b1Byb2Nlc3NvciUyQyUwQSUyMCUyMEF1dG9Ub2tlbml6ZXIlMkMlMEElMjAlMjBSYXdJbWFnZSUyQyUwQSU3RCUyMGZyb20lMjAlMjIlNDBodWdnaW5nZmFjZSUyRnRyYW5zZm9ybWVycyUyMiUzQiUwQSUwQSUyRiUyRiUyMExvYWQlMjBtb2RlbCUyQyUyMHByb2Nlc3NvciUyQyUyMGFuZCUyMHRva2VuaXplciUwQWNvbnN0JTIwbW9kZWxfaWQlMjAlM0QlMjAlMjJvbm54LWNvbW11bml0eSUyRkZsb3JlbmNlLTItYmFzZS1mdCUyMiUzQiUwQWNvbnN0JTIwbW9kZWwlMjAlM0QlMjBhd2FpdCUyMEZsb3JlbmNlMkZvckNvbmRpdGlvbmFsR2VuZXJhdGlvbi5mcm9tX3ByZXRyYWluZWQoJTBBJTIwJTIwbW9kZWxfaWQlMkMlMEElMjAlMjAlN0IlMEElMjAlMjAlMjAlMjBkdHlwZSUzQSUyMCU3QiUwQSUyMCUyMCUyMCUyMCUyMCUyMGVtYmVkX3Rva2VucyUzQSUyMCUyMmZwMTYlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjB2aXNpb25fZW5jb2RlciUzQSUyMCUyMmZwMTYlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjBlbmNvZGVyX21vZGVsJTNBJTIwJTIycTQlMjIlMkMlMEElMjAlMjAlMjAlMjAlMjAlMjBkZWNvZGVyX21vZGVsX21lcmdlZCUzQSUyMCUyMnE0JTIyJTJDJTBBJTIwJTIwJTIwJTIwJTdEJTJDJTBBJTIwJTIwJTIwJTIwZGV2aWNlJTNBJTIwJTIyd2ViZ3B1JTIyJTJDJTBBJTIwJTIwJTdEJTJDJTBBKSUzQiUwQWNvbnN0JTIwcHJvY2Vzc29yJTIwJTNEJTIwYXdhaXQlMjBBdXRvUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZChtb2RlbF9pZCklM0IlMEFjb25zdCUyMHRva2VuaXplciUyMCUzRCUyMGF3YWl0JTIwQXV0b1Rva2VuaXplci5mcm9tX3ByZXRyYWluZWQobW9kZWxfaWQpJTNCJTBBJTBBJTJGJTJGJTIwTG9hZCUyMGltYWdlJTIwYW5kJTIwcHJlcGFyZSUyMHZpc2lvbiUyMGlucHV0cyUwQWNvbnN0JTIwdXJsJTIwJTNEJTBBJTIwJTIwJTIyaHR0cHMlM0ElMkYlMkZodWdnaW5nZmFjZS5jbyUyRmRhdGFzZXRzJTJGaHVnZ2luZ2ZhY2UlMkZkb2N1bWVudGF0aW9uLWltYWdlcyUyRnJlc29sdmUlMkZtYWluJTJGdHJhbnNmb3JtZXJzJTJGdGFza3MlMkZjYXIuanBnJTIyJTNCJTBBY29uc3QlMjBpbWFnZSUyMCUzRCUyMGF3YWl0JTIwUmF3SW1hZ2UuZnJvbVVSTCh1cmwpJTNCJTBBY29uc3QlMjB2aXNpb25faW5wdXRzJTIwJTNEJTIwYXdhaXQlMjBwcm9jZXNzb3IoaW1hZ2UpJTNCJTBBJTBBJTJGJTJGJTIwU3BlY2lmeSUyMHRhc2slMjBhbmQlMjBwcmVwYXJlJTIwdGV4dCUyMGlucHV0cyUwQWNvbnN0JTIwdGFzayUyMCUzRCUyMCUyMiUzQ01PUkVfREVUQUlMRURfQ0FQVElPTiUzRSUyMiUzQiUwQWNvbnN0JTIwcHJvbXB0cyUyMCUzRCUyMHByb2Nlc3Nvci5jb25zdHJ1Y3RfcHJvbXB0cyh0YXNrKSUzQiUwQWNvbnN0JTIwdGV4dF9pbnB1dHMlMjAlM0QlMjB0b2tlbml6ZXIocHJvbXB0cyklM0IlMEElMEElMkYlMkYlMjBHZW5lcmF0ZSUyMHRleHQlMEFjb25zdCUyMGdlbmVyYXRlZF9pZHMlMjAlM0QlMjBhd2FpdCUyMG1vZGVsLmdlbmVyYXRlKCU3QiUwQSUyMCUyMC4uLnRleHRfaW5wdXRzJTJDJTBBJTIwJTIwLi4udmlzaW9uX2lucHV0cyUyQyUwQSUyMCUyMG1heF9uZXdfdG9rZW5zJTNBJTIwMTAwJTJDJTBBJTdEKSUzQiUwQSUwQSUyRiUyRiUyMERlY29kZSUyMGdlbmVyYXRlZCUyMHRleHQlMEFjb25zdCUyMGdlbmVyYXRlZF90ZXh0JTIwJTNEJTIwdG9rZW5pemVyLmJhdGNoX2RlY29kZShnZW5lcmF0ZWRfaWRzJTJDJTIwJTdCJTBBJTIwJTIwc2tpcF9zcGVjaWFsX3Rva2VucyUzQSUyMGZhbHNlJTJDJTBBJTdEKSU1QjAlNUQlM0IlMEElMEElMkYlMkYlMjBQb3N0LXByb2Nlc3MlMjB0aGUlMjBnZW5lcmF0ZWQlMjB0ZXh0JTBBY29uc3QlMjByZXN1bHQlMjAlM0QlMjBwcm9jZXNzb3IucG9zdF9wcm9jZXNzX2dlbmVyYXRpb24oJTBBJTIwJTIwZ2VuZXJhdGVkX3RleHQlMkMlMEElMjAlMjB0YXNrJTJDJTBBJTIwJTIwaW1hZ2Uuc2l6ZSUyQyUwQSklM0IlMEFjb25zb2xlLmxvZyhyZXN1bHQpJTNCJTBBJTJGJTJGJTIwJTdCJTIwJyUzQ01PUkVfREVUQUlMRURfQ0FQVElPTiUzRSclM0ElMjAnQSUyMGdyZWVuJTIwY2FyJTIwaXMlMjBwYXJrZWQlMjBpbiUyMGZyb250JTIwb2YlMjBhJTIwdGFuJTIwYnVpbGRpbmcuJTIwVGhlJTIwYnVpbGRpbmclMjBoYXMlMjBhJTIwYnJvd24lMjBkb29yJTIwYW5kJTIwdHdvJTIwYnJvd24lMjB3aW5kb3dzLiUyMFRoZSUyMGNhciUyMGlzJTIwYSUyMHR3byUyMGRvb3IlMjBhbmQlMjB0aGUlMjBkb29yJTIwaXMlMjBjbG9zZWQuJTIwVGhlJTIwZ3JlZW4lMjBjYXIlMjBoYXMlMjBibGFjayUyMHRpcmVzLiclMjAlN0Q=",highlighted:`<span class="hljs-keyword">import</span> {
<span class="hljs-title class_">Florence2ForConditionalGeneration</span>,
<span class="hljs-title class_">AutoProcessor</span>,
<span class="hljs-title class_">AutoTokenizer</span>,
<span class="hljs-title class_">RawImage</span>,
} <span class="hljs-keyword">from</span> <span class="hljs-string">&quot;@huggingface/transformers&quot;</span>;
<span class="hljs-comment">// Load model, processor, and tokenizer</span>
<span class="hljs-keyword">const</span> model_id = <span class="hljs-string">&quot;onnx-community/Florence-2-base-ft&quot;</span>;
<span class="hljs-keyword">const</span> model = <span class="hljs-keyword">await</span> <span class="hljs-title class_">Florence2ForConditionalGeneration</span>.<span class="hljs-title function_">from_pretrained</span>(
model_id,
{
<span class="hljs-attr">dtype</span>: {
<span class="hljs-attr">embed_tokens</span>: <span class="hljs-string">&quot;fp16&quot;</span>,
<span class="hljs-attr">vision_encoder</span>: <span class="hljs-string">&quot;fp16&quot;</span>,
<span class="hljs-attr">encoder_model</span>: <span class="hljs-string">&quot;q4&quot;</span>,
<span class="hljs-attr">decoder_model_merged</span>: <span class="hljs-string">&quot;q4&quot;</span>,
},
<span class="hljs-attr">device</span>: <span class="hljs-string">&quot;webgpu&quot;</span>,
},
);
<span class="hljs-keyword">const</span> processor = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoProcessor</span>.<span class="hljs-title function_">from_pretrained</span>(model_id);
<span class="hljs-keyword">const</span> tokenizer = <span class="hljs-keyword">await</span> <span class="hljs-title class_">AutoTokenizer</span>.<span class="hljs-title function_">from_pretrained</span>(model_id);
<span class="hljs-comment">// Load image and prepare vision inputs</span>
<span class="hljs-keyword">const</span> url =
<span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg&quot;</span>;
<span class="hljs-keyword">const</span> image = <span class="hljs-keyword">await</span> <span class="hljs-title class_">RawImage</span>.<span class="hljs-title function_">fromURL</span>(url);
<span class="hljs-keyword">const</span> vision_inputs = <span class="hljs-keyword">await</span> <span class="hljs-title function_">processor</span>(image);
<span class="hljs-comment">// Specify task and prepare text inputs</span>
<span class="hljs-keyword">const</span> task = <span class="hljs-string">&quot;&lt;MORE_DETAILED_CAPTION&gt;&quot;</span>;
<span class="hljs-keyword">const</span> prompts = processor.<span class="hljs-title function_">construct_prompts</span>(task);
<span class="hljs-keyword">const</span> text_inputs = <span class="hljs-title function_">tokenizer</span>(prompts);
<span class="hljs-comment">// Generate text</span>
<span class="hljs-keyword">const</span> generated_ids = <span class="hljs-keyword">await</span> model.<span class="hljs-title function_">generate</span>({
...text_inputs,
...vision_inputs,
<span class="hljs-attr">max_new_tokens</span>: <span class="hljs-number">100</span>,
});
<span class="hljs-comment">// Decode generated text</span>
<span class="hljs-keyword">const</span> generated_text = tokenizer.<span class="hljs-title function_">batch_decode</span>(generated_ids, {
<span class="hljs-attr">skip_special_tokens</span>: <span class="hljs-literal">false</span>,
})[<span class="hljs-number">0</span>];
<span class="hljs-comment">// Post-process the generated text</span>
<span class="hljs-keyword">const</span> result = processor.<span class="hljs-title function_">post_process_generation</span>(
generated_text,
task,
image.<span class="hljs-property">size</span>,
);
<span class="hljs-variable language_">console</span>.<span class="hljs-title function_">log</span>(result);
<span class="hljs-comment">// { &#x27;&lt;MORE_DETAILED_CAPTION&gt;&#x27;: &#x27;A green car is parked in front of a tan building. The building has a brown door and two brown windows. The car is a two door and the door is closed. The green car has black tires.&#x27; }</span>`,wrap:!1}}),X=new Xs({props:{source:"https://github.com/huggingface/transformers.js/blob/main/packages/transformers/docs/source/guides/dtypes.md"}}),{c(){J=p("meta"),Y=t(),x=p("p"),V=t(),i(h.$$.fragment),$=t(),i(w.$$.fragment),A=t(),U=p("p"),U.innerHTML=hs,H=t(),b=p("p"),b.innerHTML=ws,S=t(),j=p("p"),j.innerHTML=Us,F=t(),i(f.$$.fragment),L=t(),g=p("p"),g.innerHTML=bs,D=t(),i(I.$$.fragment),P=t(),i(Z.$$.fragment),O=t(),B=p("p"),B.innerHTML=fs,K=t(),i(k.$$.fragment),ss=t(),Q=p("p"),Q.innerHTML=gs,ls=t(),C=p("p"),C.textContent=Is,es=t(),i(v.$$.fragment),as=t(),i(W.$$.fragment),ts=t(),N=p("p"),N.textContent=Zs,ns=t(),G=p("p"),G.innerHTML=Bs,ps=t(),i(R.$$.fragment),os=t(),u=p("p"),u.innerHTML=ks,is=t(),T=p("details"),_=p("summary"),_.textContent=Qs,Js=t(),i(q.$$.fragment),cs=t(),i(X.$$.fragment),Ms=t(),E=p("p"),this.h()},l(s){const l=Rs("svelte-u9bgzb",document.head);J=o(l,"META",{name:!0,content:!0}),l.forEach(e),Y=n(s),x=o(s,"P",{}),js(x).forEach(e),V=n(s),c(h.$$.fragment,s),$=n(s),c(w.$$.fragment,s),A=n(s),U=o(s,"P",{"data-svelte-h":!0}),M(U)!=="svelte-jvrgvt"&&(U.innerHTML=hs),H=n(s),b=o(s,"P",{"data-svelte-h":!0}),M(b)!=="svelte-vokppa"&&(b.innerHTML=ws),S=n(s),j=o(s,"P",{align:!0,"data-svelte-h":!0}),M(j)!=="svelte-10et6vw"&&(j.innerHTML=Us),F=n(s),c(f.$$.fragment,s),L=n(s),g=o(s,"P",{"data-svelte-h":!0}),M(g)!=="svelte-1emxfy4"&&(g.innerHTML=bs),D=n(s),c(I.$$.fragment,s),P=n(s),c(Z.$$.fragment,s),O=n(s),B=o(s,"P",{"data-svelte-h":!0}),M(B)!=="svelte-1cui6je"&&(B.innerHTML=fs),K=n(s),c(k.$$.fragment,s),ss=n(s),Q=o(s,"P",{"data-svelte-h":!0}),M(Q)!=="svelte-11xu4iv"&&(Q.innerHTML=gs),ls=n(s),C=o(s,"P",{"data-svelte-h":!0}),M(C)!=="svelte-miejk1"&&(C.textContent=Is),es=n(s),c(v.$$.fragment,s),as=n(s),c(W.$$.fragment,s),ts=n(s),N=o(s,"P",{"data-svelte-h":!0}),M(N)!=="svelte-1hza90f"&&(N.textContent=Zs),ns=n(s),G=o(s,"P",{"data-svelte-h":!0}),M(G)!=="svelte-1fk7m3h"&&(G.innerHTML=Bs),ps=n(s),c(R.$$.fragment,s),os=n(s),u=o(s,"P",{align:!0,"data-svelte-h":!0}),M(u)!=="svelte-fnk7kx"&&(u.innerHTML=ks),is=n(s),T=o(s,"DETAILS",{});var ds=js(T);_=o(ds,"SUMMARY",{"data-svelte-h":!0}),M(_)!=="svelte-1lrja8l"&&(_.textContent=Qs),Js=n(ds),c(q.$$.fragment,ds),ds.forEach(e),cs=n(s),c(X.$$.fragment,s),Ms=n(s),E=o(s,"P",{}),js(E).forEach(e),this.h()},h(){ys(J,"name","hf:doc:metadata"),ys(J,"content",xs),ys(j,"align","center"),ys(u,"align","middle")},m(s,l){us(document.head,J),a(s,Y,l),a(s,x,l),a(s,V,l),r(h,s,l),a(s,$,l),r(w,s,l),a(s,A,l),a(s,U,l),a(s,H,l),a(s,b,l),a(s,S,l),a(s,j,l),a(s,F,l),r(f,s,l),a(s,L,l),a(s,g,l),a(s,D,l),r(I,s,l),a(s,P,l),r(Z,s,l),a(s,O,l),a(s,B,l),a(s,K,l),r(k,s,l),a(s,ss,l),a(s,Q,l),a(s,ls,l),a(s,C,l),a(s,es,l),r(v,s,l),a(s,as,l),r(W,s,l),a(s,ts,l),a(s,N,l),a(s,ns,l),a(s,G,l),a(s,ps,l),r(R,s,l),a(s,os,l),a(s,u,l),a(s,is,l),a(s,T,l),us(T,_),us(T,Js),r(q,T,null),a(s,cs,l),r(X,s,l),a(s,Ms,l),a(s,E,l),rs=!0},p:vs,i(s){rs||(d(h.$$.fragment,s),d(w.$$.fragment,s),d(f.$$.fragment,s),d(I.$$.fragment,s),d(Z.$$.fragment,s),d(k.$$.fragment,s),d(v.$$.fragment,s),d(W.$$.fragment,s),d(R.$$.fragment,s),d(q.$$.fragment,s),d(X.$$.fragment,s),rs=!0)},o(s){y(h.$$.fragment,s),y(w.$$.fragment,s),y(f.$$.fragment,s),y(I.$$.fragment,s),y(Z.$$.fragment,s),y(k.$$.fragment,s),y(v.$$.fragment,s),y(W.$$.fragment,s),y(R.$$.fragment,s),y(q.$$.fragment,s),y(X.$$.fragment,s),rs=!1},d(s){s&&(e(Y),e(x),e(V),e($),e(A),e(U),e(H),e(b),e(S),e(j),e(F),e(L),e(g),e(D),e(P),e(O),e(B),e(K),e(ss),e(Q),e(ls),e(C),e(es),e(as),e(ts),e(N),e(ns),e(G),e(ps),e(os),e(u),e(is),e(T),e(cs),e(Ms),e(E)),e(J),m(h,s),m(w,s),m(f,s),m(I,s),m(Z,s),m(k,s),m(v,s),m(W,s),m(R,s),m(q),m(X,s)}}}const xs='{"title":"Using quantized models (dtypes)","local":"using-quantized-models-dtypes","sections":[{"title":"Basic usage","local":"basic-usage","sections":[],"depth":2},{"title":"Detecting available dtypes","local":"detecting-available-dtypes","sections":[],"depth":2},{"title":"Per-module dtypes","local":"per-module-dtypes","sections":[],"depth":2}],"depth":1}';function Es(Ts){return Ws(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class As extends Ns{constructor(J){super(),Gs(this,J,Es,_s,Cs,{})}}export{As as component};

Xet Storage Details

Size:
23.6 kB
·
Xet hash:
462d373ff6699b7e7bf00573e15fb5312bb0a85683fba24c9d663002a07efcad

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.