Uploaded using `kernel-builder`.

Browse files

Files changed (16) hide show

build/torch211-cxx11-cu128-x86_64-linux/__init__.py +17 -1
build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} +1 -1
build/torch211-cxx11-cu128-x86_64-linux/_ops.py +3 -3
build/torch211-cxx11-cu128-x86_64-linux/metadata.json +1 -1
build/torch211-cxx11-cu130-x86_64-linux/__init__.py +17 -1
build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} +1 -1
build/torch211-cxx11-cu130-x86_64-linux/_ops.py +3 -3
build/torch211-cxx11-cu130-x86_64-linux/metadata.json +1 -1
build/torch212-cxx11-cu130-x86_64-linux/__init__.py +17 -1
build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} +1 -1
build/torch212-cxx11-cu130-x86_64-linux/_ops.py +3 -3
build/torch212-cxx11-cu130-x86_64-linux/metadata.json +1 -1
build/torch212-cxx11-cu132-x86_64-linux/__init__.py +17 -1
build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} +1 -1
build/torch212-cxx11-cu132-x86_64-linux/_ops.py +3 -3
build/torch212-cxx11-cu132-x86_64-linux/metadata.json +1 -1

build/torch211-cxx11-cu128-x86_64-linux/__init__.py CHANGED Viewed

@@ -6,7 +6,23 @@ from typing import Optional
 import torch
-from ._ops import ops
 def nvfp4_w4a4_decode_matvec_bf16out(

 import torch
+from ._ops import add_op_namespace_prefix, ops
+@torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
+def _nvfp4_w4a4_decode_matvec_bf16out_fake(
+    a_packed: torch.Tensor,
+    b_packed: torch.Tensor,
+    sfa: torch.Tensor,
+    sfb: torch.Tensor,
+    out: torch.Tensor,
+    alpha: float = 1.0,
+) -> None:
+    if b_packed.dim() != 2:
+        raise RuntimeError("b_packed must have shape (N, K / 2)")
+    if out.shape != (b_packed.shape[0],):
+        raise RuntimeError("out shape must be (b_packed.shape[0],)")
+    return None
 def nvfp4_w4a4_decode_matvec_bf16out(

build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f3dab9620683469830bc894b374cbceb448bfd36a61b3a6a49d081a13ea7c0d2
 size 120640

 version https://git-lfs.github.com/spec/v1
+oid sha256:ab7ac5141c17b2474cfa6383607064c81284d9f155a42a847bf2df2e098c30b8
 size 120640

build/torch211-cxx11-cu128-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_smallm_gemm_cuda_e9a1fe0
-ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"

 import torch
+from . import _flashrt_smallm_gemm_cuda_c4d802d
+ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"

build/torch211-cxx11-cu128-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-smallm-gemm",
-  "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-smallm-gemm",
+  "id": "_flashrt_smallm_gemm_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

build/torch211-cxx11-cu130-x86_64-linux/__init__.py CHANGED Viewed

@@ -6,7 +6,23 @@ from typing import Optional
 import torch
-from ._ops import ops
 def nvfp4_w4a4_decode_matvec_bf16out(

 import torch
+from ._ops import add_op_namespace_prefix, ops
+@torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
+def _nvfp4_w4a4_decode_matvec_bf16out_fake(
+    a_packed: torch.Tensor,
+    b_packed: torch.Tensor,
+    sfa: torch.Tensor,
+    sfb: torch.Tensor,
+    out: torch.Tensor,
+    alpha: float = 1.0,
+) -> None:
+    if b_packed.dim() != 2:
+        raise RuntimeError("b_packed must have shape (N, K / 2)")
+    if out.shape != (b_packed.shape[0],):
+        raise RuntimeError("out shape must be (b_packed.shape[0],)")
+    return None
 def nvfp4_w4a4_decode_matvec_bf16out(

build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b9a7f6a78cb3784d9efa7cde5349c220f183799254f23326aba75cd1accee4e
 size 122624

 version https://git-lfs.github.com/spec/v1
+oid sha256:53937bd54da1c1f312ba9489e82900e689d4fc71867b2066890443a0f6419d4a
 size 122624

build/torch211-cxx11-cu130-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_smallm_gemm_cuda_e9a1fe0
-ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"

 import torch
+from . import _flashrt_smallm_gemm_cuda_c4d802d
+ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"

build/torch211-cxx11-cu130-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-smallm-gemm",
-  "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-smallm-gemm",
+  "id": "_flashrt_smallm_gemm_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

build/torch212-cxx11-cu130-x86_64-linux/__init__.py CHANGED Viewed

@@ -6,7 +6,23 @@ from typing import Optional
 import torch
-from ._ops import ops
 def nvfp4_w4a4_decode_matvec_bf16out(

 import torch
+from ._ops import add_op_namespace_prefix, ops
+@torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
+def _nvfp4_w4a4_decode_matvec_bf16out_fake(
+    a_packed: torch.Tensor,
+    b_packed: torch.Tensor,
+    sfa: torch.Tensor,
+    sfb: torch.Tensor,
+    out: torch.Tensor,
+    alpha: float = 1.0,
+) -> None:
+    if b_packed.dim() != 2:
+        raise RuntimeError("b_packed must have shape (N, K / 2)")
+    if out.shape != (b_packed.shape[0],):
+        raise RuntimeError("out shape must be (b_packed.shape[0],)")
+    return None
 def nvfp4_w4a4_decode_matvec_bf16out(

build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a673a1f997be6ea22666a1bcbade96917cb8f05137797809376e30721bba547
 size 133544

 version https://git-lfs.github.com/spec/v1
+oid sha256:339477e934e35c4acb976a01cc5d806bb01e776975c799db6a02862a3c45b26f
 size 133544

build/torch212-cxx11-cu130-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_smallm_gemm_cuda_e9a1fe0
-ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"

 import torch
+from . import _flashrt_smallm_gemm_cuda_c4d802d
+ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"

build/torch212-cxx11-cu130-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-smallm-gemm",
-  "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-smallm-gemm",
+  "id": "_flashrt_smallm_gemm_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

build/torch212-cxx11-cu132-x86_64-linux/__init__.py CHANGED Viewed

@@ -6,7 +6,23 @@ from typing import Optional
 import torch
-from ._ops import ops
 def nvfp4_w4a4_decode_matvec_bf16out(

 import torch
+from ._ops import add_op_namespace_prefix, ops
+@torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
+def _nvfp4_w4a4_decode_matvec_bf16out_fake(
+    a_packed: torch.Tensor,
+    b_packed: torch.Tensor,
+    sfa: torch.Tensor,
+    sfb: torch.Tensor,
+    out: torch.Tensor,
+    alpha: float = 1.0,
+) -> None:
+    if b_packed.dim() != 2:
+        raise RuntimeError("b_packed must have shape (N, K / 2)")
+    if out.shape != (b_packed.shape[0],):
+        raise RuntimeError("out shape must be (b_packed.shape[0],)")
+    return None
 def nvfp4_w4a4_decode_matvec_bf16out(

build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6c6eeb21bcf2331def3c8f94f2b5af15d209f655475ecb07afe603c0be36eac
 size 133544

 version https://git-lfs.github.com/spec/v1
+oid sha256:7351747dda516eb5d4512cccc9506ab7c5e86d118dc477c90eb411f2e3b03b7e
 size 133544

build/torch212-cxx11-cu132-x86_64-linux/_ops.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import torch
-from . import _flashrt_smallm_gemm_cuda_e9a1fe0
-ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
-    return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"

 import torch
+from . import _flashrt_smallm_gemm_cuda_c4d802d
+ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
 def add_op_namespace_prefix(op_name: str):
     """
     Prefix op by namespace.
     """
+    return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"

build/torch212-cxx11-cu132-x86_64-linux/metadata.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "flashrt-smallm-gemm",
-  "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],

 {
   "name": "flashrt-smallm-gemm",
+  "id": "_flashrt_smallm_gemm_cuda_c4d802d",
   "version": 1,
   "license": "Apache-2.0",
   "python-depends": [],