Buckets:

hf-doc-build/doc-dev / kernels /pr_520 /en /builder /agents-guide.html
download
raw
49.1 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Writing custom kernels with code agents&quot;,&quot;local&quot;:&quot;writing-custom-kernels-with-code-agents&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Before you start&quot;,&quot;local&quot;:&quot;before-you-start&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;1. Give the agent a precise task prompt&quot;,&quot;local&quot;:&quot;1-give-the-agent-a-precise-task-prompt&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;2. Verify that the agent produces a complete kernel project&quot;,&quot;local&quot;:&quot;2-verify-that-the-agent-produces-a-complete-kernel-project&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;3. Review the generated files&quot;,&quot;local&quot;:&quot;3-review-the-generated-files&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;build.toml&quot;,&quot;local&quot;:&quot;buildtoml&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Torch bindings&quot;,&quot;local&quot;:&quot;torch-bindings&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Model integration code&quot;,&quot;local&quot;:&quot;model-integration-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;5. Build and test, and benchmark&quot;,&quot;local&quot;:&quot;5-build-and-test-and-benchmark&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;6. Benchmark&quot;,&quot;local&quot;:&quot;6-benchmark&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;7. Publish to the Hub&quot;,&quot;local&quot;:&quot;7-publish-to-the-hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/kernels/pr_520/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/entry/start.7813b24f.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/scheduler.f3b1e791.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/singletons.d3bd4a42.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/paths.6059ca6d.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/entry/app.d97547bc.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/preload-helper.4c8a338e.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/index.023a9934.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/nodes/0.884fe107.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/nodes/5.ee56f531.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/CopyLLMTxtMenu.d8c1f5b0.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.11da6958.js">
<link rel="modulepreload" href="/docs/kernels/pr_520/en/_app/immutable/chunks/CodeBlock.436ee8e3.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Writing custom kernels with code agents&quot;,&quot;local&quot;:&quot;writing-custom-kernels-with-code-agents&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Before you start&quot;,&quot;local&quot;:&quot;before-you-start&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;1. Give the agent a precise task prompt&quot;,&quot;local&quot;:&quot;1-give-the-agent-a-precise-task-prompt&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;2. Verify that the agent produces a complete kernel project&quot;,&quot;local&quot;:&quot;2-verify-that-the-agent-produces-a-complete-kernel-project&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;3. Review the generated files&quot;,&quot;local&quot;:&quot;3-review-the-generated-files&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;build.toml&quot;,&quot;local&quot;:&quot;buildtoml&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Torch bindings&quot;,&quot;local&quot;:&quot;torch-bindings&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Model integration code&quot;,&quot;local&quot;:&quot;model-integration-code&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;5. Build and test, and benchmark&quot;,&quot;local&quot;:&quot;5-build-and-test-and-benchmark&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;6. Benchmark&quot;,&quot;local&quot;:&quot;6-benchmark&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;7. Publish to the Hub&quot;,&quot;local&quot;:&quot;7-publish-to-the-hub&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="writing-custom-kernels-with-code-agents" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#writing-custom-kernels-with-code-agents"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Writing custom kernels with code agents</span></h1> <p data-svelte-h="svelte-wx8riv">Code agents are a good fit to build custom kernels because the hard part is not just writing in Domain Specific Language (DSLs) like CUDA. You also need the right project layout, PyTorch bindings, architecture-specific choices, model-specific integration, and trustworthy benchmarks.</p> <p data-svelte-h="svelte-1jfibld">Kernels on Hugging Face are compatible with agents via skills and the <code>hf</code> CLI. The <code>cuda-kernels</code> and <code>rocm-kernels</code> skills contain knowledge so an agent can generate and publish a complete kernel project, instead of isolated snippets.</p> <p data-svelte-h="svelte-1tjv5at">This guide is for <strong>authoring new kernels</strong>. If you only want to <strong>load an existing precompiled kernel</strong>, use <code>get_kernel()</code> instead.</p> <h2 class="relative group"><a id="before-you-start" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#before-you-start"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Before you start</span></h2> <p data-svelte-h="svelte-fpocgd">You need:</p> <ul data-svelte-h="svelte-1dde4tg"><li>a coding agent that supports skills, such as Claude Code, Codex, Cursor, or OpenCode</li> <li>a clear target: library, model, operation, GPU, dtype, and representative shapes</li></ul> <p data-svelte-h="svelte-tuxf7">The skill currently focuses on NVIDIA GPUs such as <strong>H100</strong>, <strong>A100</strong>, and <strong>T4</strong>, and on integration patterns for <strong>transformers</strong> and <strong>diffusers</strong>.</p> <p data-svelte-h="svelte-1xhaxu6">Install the skill into your agent. If you need the latest version from <code>main</code>, use:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->cargo install --git https://github.com/huggingface/kernels hf-kernel-builder
<span class="hljs-meta prompt_">
# </span><span class="language-bash">Install your skills. Use --claude, --codex, or --opencode</span>
kernel-builder skills add --claude<!-- HTML_TAG_END --></pre></div> <blockquote class="note" data-svelte-h="svelte-1e86mut"><p>Check <a href="https://github.com/burtenshaw/kernel-skill/tree/main/examples/ltx_video" rel="nofollow">this example</a> to see what generated kernels look like.</p></blockquote> <h2 class="relative group"><a id="1-give-the-agent-a-precise-task-prompt" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#1-give-the-agent-a-precise-task-prompt"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>1. Give the agent a precise task prompt</span></h2> <p data-svelte-h="svelte-1l1j2mn">Writing kernels is a hard problem, so be specific to agents. A robust prompt will declare all core attributes, including:</p> <ul data-svelte-h="svelte-psks7k"><li>the library, for example <code>transformers</code> or <code>diffusers</code></li> <li>the model id, for example <code>Qwen3-8B</code> or <code>LTX-Video</code></li> <li>the operation, for example <code>RMSNorm</code>, attention, RoPE, <code>GEGLU</code>, or <code>AdaLN</code></li> <li>the target GPU, for example <code>H100</code>, <code>A100</code>, or <code>T4</code></li> <li>the dtype, for example <code>bfloat16</code>, <code>float16</code>, or <code>float32</code></li> <li>the outputs you expect: kernel code, bindings, tests, and benchmarks</li></ul> <p data-svelte-h="svelte-xhu8j0">In practice, you can often skip some of these and the agent will infer based on common practice, but if you know a detail declare it.</p> <p data-svelte-h="svelte-1gkqha7">For example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">Build </span>a vectorized RMSNorm kernel for H100 targeting Qwen3<span class="hljs-number">-8</span>B in transformers.
Create the full kernel-<span class="hljs-keyword">builder </span>project, PyTorch <span class="hljs-keyword">bindings, </span>correctness tests, <span class="hljs-keyword">and </span><span class="hljs-keyword">benchmark </span><span class="hljs-keyword">scripts.</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9uonxv">Or for diffusers:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Build <span class="hljs-keyword">an</span> H100 RMSNorm kernel <span class="hljs-keyword">for</span> LTX-Video <span class="hljs-keyword">in</span> diffusers.
Patch <span class="hljs-keyword">the</span> pipeline correctly, benchmark <span class="hljs-keyword">it</span> against <span class="hljs-keyword">the</span> PyTorch baseline, <span class="hljs-keyword">and</span> report <span class="hljs-keyword">end</span>-<span class="hljs-built_in">to</span>-<span class="hljs-function"><span class="hljs-keyword">end</span> <span class="hljs-title">impact</span>.</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-136z17x">If you prefer, you can first scaffold a project with <code>kernel-builder init --name &lt;org&gt;/&lt;kernel&gt;</code> and then ask the agent to fill in the implementation.</p> <h2 class="relative group"><a id="2-verify-that-the-agent-produces-a-complete-kernel-project" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#2-verify-that-the-agent-produces-a-complete-kernel-project"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>2. Verify that the agent produces a complete kernel project</span></h2> <p data-svelte-h="svelte-nvltch">A useful result is a full <code>kernel-builder</code> project, not just a <code>.cu</code> file. The exact layout can vary, but it should include at least:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->examples<span class="hljs-regexp">/your_model/</span>
├── kernel_src/
│ └── rmsnorm.cu <span class="hljs-comment"># Vectorized CUDA kernel</span>
├── torch-ext/
│ ├── your_kernels/__init__.py
│ └── torch_binding.cpp <span class="hljs-comment"># PyTorch C++ bindings</span>
├── benchmark_rmsnorm.py <span class="hljs-comment"># Micro-benchmark script</span>
├── build.toml <span class="hljs-comment"># kernel-builder config</span>
├── setup.py <span class="hljs-comment"># pip install -e .</span>
└── pyproject.toml<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qd6e3e">The agent skills contain example scipts to help you verify the project. So you can briefly test it yourself by running:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Verify <span class="hljs-keyword">the</span> kernel project works <span class="hljs-keyword">with</span> <span class="hljs-keyword">a</span> transformers example.<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="3-review-the-generated-files" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#3-review-the-generated-files"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>3. Review the generated files</span></h2> <p data-svelte-h="svelte-qc7s6y">Let’s dive deeper into the generated files, and explore how to validate the project.</p> <h3 class="relative group"><a id="buildtoml" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#buildtoml"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>build.toml</span></h3> <p data-svelte-h="svelte-19on78x">This is the main configuration file for <code>kernel-builder</code>. It tells <code>kernel-builder</code> what to build and how so it should contain all the core information about your kernel project.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-section">[general]</span>
<span class="hljs-attr">name</span> = <span class="hljs-string">&quot;your_kernels&quot;</span>
<span class="hljs-attr">backends</span> = [<span class="hljs-string">&quot;cuda&quot;</span>]
<span class="hljs-attr">version</span> = <span class="hljs-number">1</span>
<span class="hljs-section">[torch]</span>
<span class="hljs-attr">src</span> = [<span class="hljs-string">&quot;torch-ext/torch_binding.cpp&quot;</span>]
<span class="hljs-section">[kernel.rmsnorm]</span>
<span class="hljs-attr">backend</span> = <span class="hljs-string">&quot;cuda&quot;</span>
<span class="hljs-attr">src</span> = [<span class="hljs-string">&quot;kernel_src/rmsnorm.cu&quot;</span>]
<span class="hljs-attr">depends</span> = [<span class="hljs-string">&quot;torch&quot;</span>]
<span class="hljs-attr">cuda-capabilities</span> = [<span class="hljs-string">&quot;9.0&quot;</span>] <span class="hljs-comment"># H100</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-a88ner">First check that:</p> <ul data-svelte-h="svelte-1iqpkrd"><li><code>backends = [&quot;cuda&quot;]</code> is correct for your project</li> <li>the kernel source files are listed correctly</li> <li>the Torch binding sources are included under <code>[torch]</code></li> <li><code>cuda-capabilities</code> is only set when the kernel truly targets specific architectures</li></ul> <p data-svelte-h="svelte-1yjphxb">For architecture-specific kernels, typical capability values are:</p> <ul data-svelte-h="svelte-ezzwa8"><li>H100: <code>9.0</code></li> <li>A100: <code>8.0</code></li> <li>T4: <code>7.5</code></li></ul> <p data-svelte-h="svelte-l5sw96">If the kernel does <strong>not</strong> require a specific capability, the kernels docs recommend leaving <code>cuda-capabilities</code> unset so the builder can target all supported capabilities. In practice, you can prompt your agent to review the <code>build.toml</code> for excessive definitions. Agents have a tendency to over-specify capabilities.</p> <h3 class="relative group"><a id="torch-bindings" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torch-bindings"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Torch bindings</span></h3> <p data-svelte-h="svelte-ajpoy6">The kernel should be registered as Torch ops in <code>torch-ext/torch_binding.cpp</code>, with declarations in a header and a small Python wrapper in <code>torch-ext/&lt;name&gt;/__init__.py</code>. This is what makes the kernel callable from Python and is the right foundation for <code>torch.compile</code> compatibility.</p> <h3 class="relative group"><a id="model-integration-code" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#model-integration-code"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Model integration code</span></h3> <p data-svelte-h="svelte-1frq87x">Make sure the integration matches the library:</p> <ul data-svelte-h="svelte-fkol0c"><li><strong>transformers</strong>: patch the target modules directly, often RMSNorm modules whose class name contains <code>RMSNorm</code></li> <li><strong>diffusers</strong>: inspect the actual pipeline structure before patching, because modules and attention processors can differ across pipelines</li></ul> <blockquote class="note" data-svelte-h="svelte-1rkvfp"><p>One common issue is that the agent will not integrate the kernel at all. Typically because the project’s context is so long.</p></blockquote> <p data-svelte-h="svelte-1n5kowm">A few patterns matter in practice for the integration code:</p> <ul data-svelte-h="svelte-5lzvco"><li>In <strong>transformers</strong>, RMSNorm modules generally have weights, but epsilon may be exposed as <code>variance_epsilon</code> or <code>eps</code> depending on the model.</li> <li>In <strong>diffusers</strong>, some RMSNorm modules may have <code>weight=None</code>, so the integration code needs to handle both weighted and unweighted cases.</li> <li>In <strong>diffusers</strong>, checking <code>type(module).__name__</code> is often more reliable than <code>isinstance(...)</code> for matching RMSNorm modules across implementations.</li> <li>If a diffusers pipeline uses CPU offloading, inject custom kernels <strong>before</strong> enabling offload.</li></ul> <p data-svelte-h="svelte-10mo6mc">For attention, prefer the model library’s existing optimized path when one already exists. For example, in <code>transformers</code>, Flash Attention 2 is usually the right baseline for attention, while custom kernels are especially useful for operations like RMSNorm and other targeted hotspots.</p> <h2 class="relative group"><a id="5-build-and-test-and-benchmark" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#5-build-and-test-and-benchmark"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>5. Build and test, and benchmark</span></h2> <p data-svelte-h="svelte-rncryh">Kernel Hub kernels must support all recent PyTorch and CUDA configurations. The kernel-builder Nix flake handles this automatically. Copy the <a href="https://github.com/huggingface/kernels/blob/main/builder/examples/relu/flake.nix" rel="nofollow">example <code>flake.nix</code></a> into your project and run:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->nix flake update
nix run .#build-and-copy -L<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jmhax9">This builds the kernel for every required PyTorch/CUDA variant and places the results in <code>build/</code>. For faster builds, enable the HuggingFace Nix cache:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->nix run nixpkgs#cachix -- use huggingface<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="6-benchmark" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#6-benchmark"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>6. Benchmark</span></h2> <p data-svelte-h="svelte-102wbab">There are two main benchmarks to consider:</p> <ol data-svelte-h="svelte-2mf30q"><li>an isolated kernel micro-benchmark</li> <li>an end-to-end benchmark in the real model or pipeline</li></ol> <p data-svelte-h="svelte-qsn61q">The agent will generate both benchmarks based on the agent skills examples. Typically as a script called <code>benchmark_example.py</code>. If you have access to the target hardware, you can run it to verify the kernel works. For example, the agent will generat a table like this:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->| Shape | Custom (ms) | PyTorch (ms) | Speedup |
| :---- | :---: | :---: | :---: |
| [1x128x4096] | 0.040 | 0.062 | <span class="hljs-strong">**1.58x**</span> |
| [1x512x4096] | 0.038 | 0.064 | <span class="hljs-strong">**1.69x**</span> |
| [1x1024x4096] | 0.037 | 0.071 | <span class="hljs-strong">**1.90x**</span> |
| [1x2048x4096] | 0.045 | 0.091 | <span class="hljs-strong">**2.03x**</span> |
| [1x4096x4096] | 0.071 | 0.150 | <span class="hljs-strong">**2.12x**</span> |
| [4x512x4096] | 0.056 | 0.093 | <span class="hljs-strong">**1.67x**</span> |
| [8x256x4096] | 0.045 | 0.092 | <span class="hljs-strong">**2.06x**</span> |
| [1x8192x4096] | 0.109 | 0.269 | <span class="hljs-strong">**2.47x**</span> |<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-150j4fc">Interpret the results carefully. A kernel can show a large isolated speedup but only a modest end-to-end gain if that operation is a small fraction of total runtime. In the LTX-Video example from <a href="https://huggingface.co/blog/custom-cuda-kernels-agent-skills" rel="nofollow">the blog we wrote</a>, the generated RMSNorm kernel improved the isolated benchmark by about <strong>1.88x</strong> on average, but end-to-end video generation improved by about <strong>6%</strong>, which matched the fact that RMSNorm accounted for only a small share of total compute.</p> <h2 class="relative group"><a id="7-publish-to-the-hub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#7-publish-to-the-hub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>7. Publish to the Hub</span></h2> <p data-svelte-h="svelte-ru08f4">Once the project is correct and benchmarked, you can build Hub-compatible artifacts and upload them. For this, you should first push to the Hub using the <code>hf</code> CLI tool:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta prompt_"># </span><span class="language-bash">install the hf CLI tool</span>
hf skills add
<span class="hljs-meta prompt_">
# </span><span class="language-bash">Authenticate</span>
hf auth login
<span class="hljs-meta prompt_">
# </span><span class="language-bash">Push to the Hub</span>
&lt;agent-prompt&gt;
Push the kernel to the Hub.
&lt;/agent-prompt&gt;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ne6ecm">Or, you can manually create the repository and upload the artifacts:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta prompt_"># </span><span class="language-bash">Create the repository</span>
hf repo create your-org/your-kernel --type model
<span class="hljs-meta prompt_">
# </span><span class="language-bash">Upload the artifacts</span>
<span class="hljs-meta prompt_"># </span><span class="language-bash">Run inside the main kernel directory, <span class="hljs-built_in">where</span> build/ is.</span>
kernel-builder upload<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-d41u1v">After pushing to the Hub, users can load the kernel without compiling:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> kernels <span class="hljs-keyword">import</span> get_kernel
kernel = get_kernel(<span class="hljs-string">&quot;your-org/your-kernel&quot;</span>, version=<span class="hljs-number">1</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-454bv6">Well done! You have now built a custom kernel and published it to the Hub.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/kernels/blob/main/docs/source/builder/agents-guide.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_lhya45 = {
assets: "/docs/kernels/pr_520/en",
base: "/docs/kernels/pr_520/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/kernels/pr_520/en/_app/immutable/entry/start.7813b24f.js"),
import("/docs/kernels/pr_520/en/_app/immutable/entry/app.d97547bc.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 5],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
49.1 kB
·
Xet hash:
7011fba9334ed1538513a98254ed014691d3bbc62437477806dea4e0282d6f6d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.