Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Writing Hub kernels with kernel-builder","local":"writing-hub-kernels-with-kernel-builder","sections":[{"title":"Introduction","local":"introduction","sections":[],"depth":2},{"title":"Setting up environment","local":"setting-up-environment","sections":[{"title":"Quick install","local":"quick-install","sections":[],"depth":3},{"title":"Cloud environment","local":"cloud-environment","sections":[],"depth":3}],"depth":2},{"title":"Starting a new kernel","local":"starting-a-new-kernel","sections":[],"depth":2},{"title":"Kernel project layout","local":"kernel-project-layout","sections":[],"depth":2},{"title":"build.toml","local":"buildtoml","sections":[{"title":"general","local":"general","sections":[],"depth":3},{"title":"general.hub","local":"generalhub","sections":[],"depth":3},{"title":"general.cuda","local":"generalcuda","sections":[],"depth":3},{"title":"torch","local":"torch","sections":[],"depth":3},{"title":"kernel.&lt;name&gt;","local":"kernelltnamegt","sections":[{"title":"cuda","local":"cuda","sections":[],"depth":4},{"title":"rocm","local":"rocm","sections":[],"depth":4},{"title":"xpu","local":"xpu","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Torch bindings","local":"torch-bindings","sections":[{"title":"Defining bindings","local":"defining-bindings","sections":[],"depth":3}],"depth":2},{"title":"Using kernel functions from Python","local":"using-kernel-functions-from-python","sections":[],"depth":2},{"title":"Kernel tests","local":"kernel-tests","sections":[],"depth":2},{"title":"Kernel docs","local":"kernel-docs","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/kernels/pr_463/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/entry/start.6d13fe27.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/scheduler.f3b1e791.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/singletons.273ecdb6.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/paths.0f90e935.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/entry/app.8079e396.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/preload-helper.52e58b14.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/index.023a9934.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/nodes/0.88b84645.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/nodes/11.a0bb2d1b.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/CopyLLMTxtMenu.1f02c0cb.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.eae8da11.js"> | |
| <link rel="modulepreload" href="/docs/kernels/pr_463/en/_app/immutable/chunks/CodeBlock.480185e2.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Writing Hub kernels with kernel-builder","local":"writing-hub-kernels-with-kernel-builder","sections":[{"title":"Introduction","local":"introduction","sections":[],"depth":2},{"title":"Setting up environment","local":"setting-up-environment","sections":[{"title":"Quick install","local":"quick-install","sections":[],"depth":3},{"title":"Cloud environment","local":"cloud-environment","sections":[],"depth":3}],"depth":2},{"title":"Starting a new kernel","local":"starting-a-new-kernel","sections":[],"depth":2},{"title":"Kernel project layout","local":"kernel-project-layout","sections":[],"depth":2},{"title":"build.toml","local":"buildtoml","sections":[{"title":"general","local":"general","sections":[],"depth":3},{"title":"general.hub","local":"generalhub","sections":[],"depth":3},{"title":"general.cuda","local":"generalcuda","sections":[],"depth":3},{"title":"torch","local":"torch","sections":[],"depth":3},{"title":"kernel.&lt;name&gt;","local":"kernelltnamegt","sections":[{"title":"cuda","local":"cuda","sections":[],"depth":4},{"title":"rocm","local":"rocm","sections":[],"depth":4},{"title":"xpu","local":"xpu","sections":[],"depth":4}],"depth":3}],"depth":2},{"title":"Torch bindings","local":"torch-bindings","sections":[{"title":"Defining bindings","local":"defining-bindings","sections":[],"depth":3}],"depth":2},{"title":"Using kernel functions from Python","local":"using-kernel-functions-from-python","sections":[],"depth":2},{"title":"Kernel tests","local":"kernel-tests","sections":[],"depth":2},{"title":"Kernel docs","local":"kernel-docs","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="writing-hub-kernels-with-kernel-builder" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#writing-hub-kernels-with-kernel-builder"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Writing Hub kernels with kernel-builder</span></h1> <h2 class="relative group"><a id="introduction" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#introduction"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Introduction</span></h2> <p data-svelte-h="svelte-1i1c9ni">The Kernel Hub allows Python libraries and applications to load compute | |
| kernels directly from the <a href="https://hf.co/" rel="nofollow">Hub</a>. To support this kind | |
| of dynamic loading, Hub kernels differ from traditional Python kernel | |
| packages in that they are made to be:</p> <ul data-svelte-h="svelte-1odnsz3"><li>Portable: a kernel can be loaded from paths outside <code>PYTHONPATH</code>.</li> <li>Unique: multiple versions of the same kernel can be loaded in the | |
| same Python process.</li> <li>Compatible: kernels must support all recent versions of Python and | |
| the different PyTorch build configurations (various CUDA versions | |
| and C++ ABIs). Furthermore, older C library versions must be supported.</li></ul> <p data-svelte-h="svelte-1gp9ycj"><code>kernel-builder</code> is a set of tools that can build conforming kernels. It | |
| takes care of:</p> <ul data-svelte-h="svelte-ysrhph"><li>Building kernels for all supported PyTorch configurations (C++98/11 and | |
| different CUDA versions).</li> <li>Compatibility with old glibc and libstdc++ versions, so that kernels also | |
| work on older Linux distributions.</li> <li>Registering Torch ops, such that multiple versions the same kernel can be | |
| loaded without namespace conflicts.</li></ul> <p data-svelte-h="svelte-vy7wcb"><code>kernel-builder</code> builds are configured through a <code>build.toml</code> file. | |
| <code>build.toml</code> is a simple format that does not require intricate knowledge | |
| of CMake or setuptools.</p> <p data-svelte-h="svelte-o9ssdv">This page describes the directory layout of a kernel-builder project, the | |
| format of the <code>build.toml</code> file, and some additional Python glue that | |
| <code>kernel-builder</code> provides. We will use a <a href="https://github.com/huggingface/kernels/tree/main/examples/kernels/relu" rel="nofollow">simple ReLU kernel</a> | |
| as the running example. After reading this page, you may also want to have | |
| a look at the more realistic <a href="https://github.com/huggingface/kernels/tree/main/examples/kernels/relu-backprop-compile" rel="nofollow">ReLU kernel with backprop and <code>torch.compile</code></a> | |
| support.</p> <h2 class="relative group"><a id="setting-up-environment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#setting-up-environment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Setting up environment</span></h2> <h3 class="relative group"><a id="quick-install" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#quick-install"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Quick install</span></h3> <p data-svelte-h="svelte-96m8sj">The fastest way to get started is to run the install script. This | |
| installs <a href="https://docs.determinate.systems/determinate-nix/" rel="nofollow">Determinate Nix</a> | |
| and <code>kernel-builder</code> in a single command:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->curl -fsSL https://raw.githubusercontent.com/huggingface/kernels/main/install.sh | bash<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1b4vj4e">This will:</p> <ol data-svelte-h="svelte-3arzgs"><li>Install Determinate Nix (if not already installed).</li> <li>Configure the Hugging Face binary cache (to avoid building dependencies from | |
| source).</li> <li>Install <code>kernel-builder</code> via <code>nix profile install</code>.</li></ol> <p data-svelte-h="svelte-1ihjqjm">To update <code>kernel-builder</code> later:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->nix profile upgrade --all<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cac215">For a step-by-step breakdown of what the script does, see | |
| <a href="nix">Using the kernel builder with Nix</a>.</p> <h3 class="relative group"><a id="cloud-environment" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#cloud-environment"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Cloud environment</span></h3> <p data-svelte-h="svelte-4lye9k">In the <a href="https://github.com/huggingface/kernels/tree/main/terraform" rel="nofollow"><code>terraform</code></a> directory, we provide an | |
| example of programatically spinning up an EC2 instance that is ready | |
| with everything needed for you to start developing and building | |
| kernels.</p> <p data-svelte-h="svelte-g5i707">If you use a different provider, the Terraform bridges should be | |
| similar and straightforward to modify.</p> <h2 class="relative group"><a id="starting-a-new-kernel" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#starting-a-new-kernel"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Starting a new kernel</span></h2> <p data-svelte-h="svelte-cn9mkc">The easiest way to start a new kernel is by using the <code>init</code> subcommand | |
| of <code>kernel-builder</code>. This creates a minimal, compilable kernel:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ kernel-builder init --name myorg/mykernel | |
| Initialized `myorg/mykernel` at /home/daniel/git/kernels/examples/kernels/mykernel<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1prqyz3">This creates a kernel named <code>mykernel</code> in the directory <code>mykernel</code>. The | |
| kernel is configured to upload to the <code>myorg/mykernel</code> Hub | |
| repository when an upload command is used.</p> <p data-svelte-h="svelte-1exh4w8">By default, the <code>init</code> subcommand creates a CUDA kernel. You can specify | |
| another backend with the <code>--backends</code> option:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ kernel-builder init --name myorg/mykernel --backends xpu<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1poutmy">You can also make a multi-backend kernel by adding all the backends | |
| that you would like to support as arguments to <code>--backends</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ kernel-builder init --name myorg/mykernel --backends cuda xpu | |
| Initialized `myorg/mykernel` at /home/daniel/git/kernels/examples/kernels/mykernel<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-s84xza">Finally, if you want to create a kernel for all supported backends, you | |
| can use <code>--backends all</code>.</p> <h2 class="relative group"><a id="kernel-project-layout" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kernel-project-layout"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kernel project layout</span></h2> <p data-svelte-h="svelte-6oo725">Kernel projects follow this general directory layout:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->mykernel | |
| ├── benchmarks | |
| │ └── benchmark.py | |
| ├── build.toml | |
| ├── CARD.md | |
| ├── example.py | |
| ├── flake.nix | |
| ├── mykernel_cuda | |
| │ └── mykernel.cu | |
| ├── tests | |
| │ ├── __init__.py | |
| │ └── test_mykernel.py | |
| └── torch-ext | |
| ├── mykernel | |
| │ └── __init__.py | |
| ├── torch_binding.cpp | |
| └── torch_binding.h<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1lqdx4">In this example we can find:</p> <ul data-svelte-h="svelte-c8vtob"><li>The build configuration in <code>build.toml</code>.</li> <li>One or more top-level directories containing kernels (<code>mykernel_cuda</code>).</li> <li>The <code>torch-ext</code> directory, which contains: | |
| <ul><li><code>torch_binding.h</code>: contains declarations for kernel entry points | |
| (from <code>kernel_a</code> and <code>kernel_b</code>).</li> <li><code>torch_binding.cpp</code>: registers the entry points as Torch ops.</li> <li><code>torch_ext/mykernel</code>: contains any Python wrapping the kernel needs. At the | |
| bare minimum, it should contain an <code>__init__.py</code> file.</li></ul></li> <li>Kernel tests in the directory <code>tests</code>.</li> <li>Benchmarks in the directory <code>benchmarks</code>.</li> <li>A kernel card template in <code>CARD.md</code>. This placeholders in the card are filled | |
| during the kernel build.</li> <li>The Nix flake configuration in <code>flake.nix</code>.</li> <li>An example script that uses the kernel in <code>example.py</code>.</li></ul> <h2 class="relative group"><a id="buildtoml" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#buildtoml"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>build.toml</span></h2> <p data-svelte-h="svelte-1hgp1cr"><code>build.toml</code> tells <code>kernel-builder</code> what to build and how. It looks as | |
| follows for the <code>mykernel</code> kernel:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-section">[general]</span> | |
| <span class="hljs-attr">backends</span> = [ | |
| <span class="hljs-string">"cuda"</span>, | |
| ] | |
| <span class="hljs-attr">name</span> = <span class="hljs-string">"mykernel"</span> | |
| <span class="hljs-attr">version</span> = <span class="hljs-number">1</span> | |
| <span class="hljs-section">[general.hub]</span> | |
| <span class="hljs-attr">repo-id</span> = <span class="hljs-string">"myorg/mykernel"</span> | |
| <span class="hljs-section">[torch]</span> | |
| <span class="hljs-attr">src</span> = [ | |
| <span class="hljs-string">"torch-ext/torch_binding.cpp"</span>, | |
| <span class="hljs-string">"torch-ext/torch_binding.h"</span>, | |
| ] | |
| <span class="hljs-section">[kernel.mykernel]</span> | |
| <span class="hljs-attr">backend</span> = <span class="hljs-string">"cuda"</span> | |
| <span class="hljs-attr">depends</span> = [<span class="hljs-string">"torch"</span>] | |
| <span class="hljs-attr">src</span> = [<span class="hljs-string">"mykernel_cuda/mykernel.cu"</span>] | |
| <span class="hljs-comment"># If the kernel is only supported on specific capabilities, set the</span> | |
| <span class="hljs-comment"># cuda-capabilities option:</span> | |
| <span class="hljs-comment">#</span> | |
| <span class="hljs-comment"># cuda-capabilities = [ "9.0", "10.0", "12.0" ]</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pgruc9">The following sections enumerate all supported options for <code>build.toml</code>.</p> <h3 class="relative group"><a id="general" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#general"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>general</span></h3> <ul data-svelte-h="svelte-3ou1jk"><li><code>name</code> (required): the name of the kernel. The Python code for a Torch | |
| extension must be stored in <code>torch-ext/<name></code>.</li> <li><code>version</code> (int, <strong>experimental</strong>): the major version of the kernel. | |
| The version is written to the kernel’s <code>metadata.json</code> and is used | |
| by the <code>kernels upload</code> command to upload the kernel to a version | |
| branch named <code>v<version></code>.</li> <li><code>backends</code> (required): a list of supported backends. Must be one or | |
| more of <code>cpu</code>, <code>cuda</code>, <code>metal</code>, <code>rocm</code>, or <code>xpu</code>.</li> <li><code>python-depends</code> (<strong>experimental</strong>): a list of additional Python dependencies | |
| that the kernel requires. The only supported dependencies are <code>einops</code> | |
| and <code>nvidia-cutlass-dsl</code>.</li></ul> <h3 class="relative group"><a id="generalhub" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generalhub"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>general.hub</span></h3> <ul data-svelte-h="svelte-12rq95i"><li><code>repo-id</code>: the Hub repository to upload the kernel to when the <code>upload</code> or | |
| <code>build-and-upload</code> subcommands of <code>kernel-builder</code> are used.</li></ul> <h3 class="relative group"><a id="generalcuda" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#generalcuda"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>general.cuda</span></h3> <ul data-svelte-h="svelte-1fzn2"><li><code>maxver</code>: the maximum CUDA toolkit version (inclusive). This option | |
| <em>must not</em> be set under normal circumstances, since it can exclude Torch | |
| build variants that are <a href="../kernel-requirements">required for compliant kernels</a>. | |
| This option is provided for kernels that cause compiler errors on | |
| newer CUDA toolkit versions.</li> <li><code>minver</code>: the minimum required CUDA toolkit version. This option | |
| <em>must not</em> be set under normal circumstances, since it can exclude Torch | |
| build variants that are <a href="../kernel-requirements">required for compliant kernels</a>. | |
| This option is provided for kernels that require functionality only | |
| provided by newer CUDA toolkits.</li></ul> <h3 class="relative group"><a id="torch" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torch"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>torch</span></h3> <p data-svelte-h="svelte-3rear0">This section describes the Torch extension. In the future, there may be | |
| similar sections for other frameworks. This section has the following | |
| options:</p> <ul data-svelte-h="svelte-1079mx2"><li><code>src</code> (required): a list of source files and headers.</li> <li><code>pyext</code> (optional): the list of extensions for Python files. Default: | |
| <code>["py", "pyi"]</code>.</li> <li><code>include</code> (optional): include directories relative to the project root. | |
| Default: <code>[]</code>.</li> <li><code>maxver</code> (optional): only build for this Torch version and earlier. Use cautiously, since this option produces | |
| non-compliant kernels if the version range does not correspond to the <a href="build-variants">required variants</a>.</li> <li><code>minver</code> (optional): only build for this Torch version and later. Use cautiously, since this option produces | |
| non-compliant kernels if the version range does not correspond to the <a href="build-variants">required variants</a>.</li></ul> <h3 class="relative group"><a id="kernelltnamegt" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kernelltnamegt"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>kernel.<name></span></h3> <p data-svelte-h="svelte-1weg0gu">Specification of a kernel with the name <code><name></code>. Multiple <code>kernel.<name></code> | |
| sections can be defined in the same <code>build.toml</code>. | |
| See for example <a href="https://huggingface.co/kernels-community/quantization/" rel="nofollow"><code>kernels-community/quantization</code></a> | |
| for an example with multiple kernel sections.</p> <p data-svelte-h="svelte-1al5myg">The following options can be set for a kernel:</p> <ul data-svelte-h="svelte-1fbzu1m"><li><code>backend</code> (required): the compute backend of the kernel. The currently | |
| supported backends are <code>cpu</code>, <code>cuda</code>, <code>metal</code>, <code>rocm</code>, and <code>xpu</code>. | |
| <strong>The <code>cpu</code> backend is currently experimental and might still change.</strong></li> <li><code>depends</code> (required): a list of dependencies. The supported dependencies | |
| are listed in <a href="https://github.com/huggingface/kernels/blob/main/builder/lib/deps.nix" rel="nofollow"><code>deps.nix</code></a>.</li> <li><code>src</code> (required): a list of source files and headers.</li> <li><code>include</code> (optional): include directories relative to the project root. | |
| Default: <code>[]</code>.</li></ul> <p data-svelte-h="svelte-1mf578x">Besides these shared options, the following backend-specific options | |
| are available:</p> <h4 class="relative group"><a id="cuda" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#cuda"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>cuda</span></h4> <ul data-svelte-h="svelte-1ym43dg"><li><code>cuda-capabilities</code> (optional): a list of CUDA capabilities that the | |
| kernel should be compiled for. When absent, the kernel will be built | |
| using all capabilities that the builder supports. The effective | |
| capabilities are the intersection of this list and the capabilities | |
| supported by the CUDA compiler. It is recommended to leave this option | |
| unspecified <strong>unless</strong> a kernel requires specific capabilities.</li> <li><code>cuda_flags</code> (optional): additional flags to be passed to <code>nvcc</code>. | |
| <strong>Warning</strong>: this option should only be used in exceptional circumstances. | |
| Custom compile flags can interfere with the build process or break | |
| compatibility requirements.</li></ul> <h4 class="relative group"><a id="rocm" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#rocm"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>rocm</span></h4> <ul data-svelte-h="svelte-mx7mxt"><li><code>rocm-archs</code>: a list of ROCm architectures that the kernel should be | |
| compiled for.</li></ul> <h4 class="relative group"><a id="xpu" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xpu"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>xpu</span></h4> <ul data-svelte-h="svelte-10v9trt"><li><code>sycl_flags</code>: a list of additional flags to be passed to the SYCL | |
| compiler.</li></ul> <h2 class="relative group"><a id="torch-bindings" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#torch-bindings"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Torch bindings</span></h2> <h3 class="relative group"><a id="defining-bindings" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#defining-bindings"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Defining bindings</span></h3> <p data-svelte-h="svelte-r7rot5">Torch bindings are defined in C++, kernels commonly use two files:</p> <ul data-svelte-h="svelte-poxr41"><li><code>torch_binding.h</code> containing function declarations.</li> <li><code>torch_binding.cpp</code> registering the functions as Torch ops.</li></ul> <p data-svelte-h="svelte-1ikzb58">For instance, the <code>mykernel</code> kernel discussed above has the following | |
| declaration in <code>torch_binding.h</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">#<span class="hljs-keyword">pragma</span> once</span> | |
| <span class="hljs-meta">#<span class="hljs-keyword">include</span> <span class="hljs-string"><torch/torch.h></span></span> | |
| <span class="hljs-function"><span class="hljs-type">void</span> <span class="hljs-title">mykernel</span><span class="hljs-params">(torch::Tensor &out, torch::Tensor <span class="hljs-type">const</span> &input)</span></span>;<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5ja9lk">This function is then registered as a Torch op in <code>torch_binding.cpp</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">#<span class="hljs-keyword">include</span> <span class="hljs-string"><torch/library.h></span></span> | |
| <span class="hljs-meta">#<span class="hljs-keyword">include</span> <span class="hljs-string">"registration.h"</span></span> | |
| <span class="hljs-meta">#<span class="hljs-keyword">include</span> <span class="hljs-string">"torch_binding.h"</span></span> | |
| <span class="hljs-built_in">TORCH_LIBRARY_EXPAND</span>(TORCH_EXTENSION_NAME, ops) { | |
| ops.<span class="hljs-built_in">def</span>(<span class="hljs-string">"mykernel(Tensor! out, Tensor input) -> ()"</span>); | |
| <span class="hljs-meta">#<span class="hljs-keyword">if</span> defined(CUDA_KERNEL) || defined(ROCM_KERNEL)</span> | |
| ops.<span class="hljs-built_in">impl</span>(<span class="hljs-string">"mykernel"</span>, torch::kCUDA, &mykernel); | |
| <span class="hljs-meta">#<span class="hljs-keyword">endif</span></span> | |
| } | |
| <span class="hljs-built_in">REGISTER_EXTENSION</span>(TORCH_EXTENSION_NAME)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ssih46">This snippet uses macros from <code>registration.h</code> to register the function. | |
| <code>registration.h</code> is generated by <code>kernel-builder</code> itself. A function | |
| is registered through the <code>def</code>/<code>ops</code> methods. <code>ops</code> specifies the | |
| function signature following the <a href="https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/README.md#func" rel="nofollow">function schema</a>. | |
| <code>impl</code> associates the function name with the C/C++ function and | |
| the applicable device.</p> <h2 class="relative group"><a id="using-kernel-functions-from-python" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#using-kernel-functions-from-python"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Using kernel functions from Python</span></h2> <p data-svelte-h="svelte-1y8rjsx">The bindings are typically wrapped in Python code in <code>torch_ext/<name></code>. | |
| The native code is exposed under the <code>torch.ops</code> namespace. However, | |
| we add some unique material to the name of the extension to ensure that | |
| different versions of the same extension can be loaded at the same time. | |
| As a result, the extension is registered as | |
| <code>torch.ops.<name>_<unique_material></code>.</p> <p data-svelte-h="svelte-a3yu8e">To deal with this uniqueness, <code>kernel_builder</code> generates a Python module | |
| named <code>_ops</code> that contains an alias for the name. This can be used to | |
| refer to the correct <code>torch.ops</code> module. For example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> typing <span class="hljs-keyword">import</span> <span class="hljs-type">Optional</span> | |
| <span class="hljs-keyword">import</span> torch | |
| <span class="hljs-keyword">from</span> ._ops <span class="hljs-keyword">import</span> ops | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">mykernel</span>(<span class="hljs-params">x: torch.Tensor, out: <span class="hljs-type">Optional</span>[torch.Tensor] = <span class="hljs-literal">None</span></span>) -> torch.Tensor: | |
| <span class="hljs-keyword">if</span> out <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>: | |
| out = torch.empty_like(x) | |
| ops.mykernel(out, x) | |
| <span class="hljs-keyword">return</span> out<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="kernel-tests" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kernel-tests"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kernel tests</span></h2> <p data-svelte-h="svelte-q979am">Kernel tests are stored in the <code>tests</code> directory. Since running all | |
| kernel tests in CI may be prohibitively expensive, the <code>pyproject.toml</code> | |
| generated by the builder adds support for the special <code>kernels_ci</code> | |
| PyTest marker that can be used as follows:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> pytest | |
| <span class="hljs-meta">@pytest.mark.kernels_ci</span> | |
| <span class="hljs-keyword">def</span> <span class="hljs-title function_">test_mykernel</span>(): | |
| ...<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1uzdod8">We recommend that you to pick tests that together would catch most error | |
| cases while running within 60 seconds.</p> <p data-svelte-h="svelte-s01sgi">You can run the tests (e.g. in CI) using:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ nix run .<span class="hljs-comment">#ci-test</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ucbtrx">If the kernel supports multiple backends, it will run the test for the | |
| first supported backend that was found, obeying the following order: CUDA, | |
| ROCm, XPU, Metal, CPU. If you would like to the tests for a specific build | |
| variant, you can use <code>nix run .#ciTests.<variant></code>. For instance:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->$ nix run .<span class="hljs-comment">#ciTests.torch210-cxx11-cpu-x86_64-linux</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gcopz4">When running the tests on a non-NixOS systems, make sure that | |
| <a href="https://danieldk.eu/Software/Nix/Nix-CUDA-on-non-NixOS-systems#solutions" rel="nofollow">the CUDA driver library can be found</a>.</p> <h2 class="relative group"><a id="kernel-docs" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#kernel-docs"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Kernel docs</span></h2> <p data-svelte-h="svelte-11q3n4g">We provide a utility to generate a system card for a given kernel, utilizing | |
| information from its <code>build.toml</code> and metadata. This system card provides a | |
| reasonable starting point and is meant to be edited afterward by the kernel | |
| developer.</p> <p data-svelte-h="svelte-zjr5ot">The template card is generated as a part of <code>kernel-builder init</code> | |
| command and is serialized in the root directory of the kernel.</p> <p data-svelte-h="svelte-rt0n0n">The card will be filled automatically by the builder when using the | |
| <code>build-and-upload</code> or <code>build-and-copy</code> command. It will be serialized | |
| to the <code>build</code> sub-directory inside the main kernel directory. It | |
| will be uploaded as <code>README.md</code> to the Hub.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/kernels/blob/main/docs/source/builder/writing-kernels.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_q2anp7 = { | |
| assets: "/docs/kernels/pr_463/en", | |
| base: "/docs/kernels/pr_463/en", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/kernels/pr_463/en/_app/immutable/entry/start.6d13fe27.js"), | |
| import("/docs/kernels/pr_463/en/_app/immutable/entry/app.8079e396.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 11], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 68.6 kB
- Xet hash:
- dc229b2238776c1098b2788f9b5aa3ff6e1dbed106087b26587298d43c3c4a1d
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.