liangsu9988 commited on
Commit
cc065aa
·
verified ·
1 Parent(s): fcc5ab1

Uploaded using `kernel-builder`.

Browse files
build/torch211-cxx11-cu128-x86_64-linux/__init__.py CHANGED
@@ -6,7 +6,23 @@ from typing import Optional
6
 
7
  import torch
8
 
9
- from ._ops import ops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def nvfp4_w4a4_decode_matvec_bf16out(
 
6
 
7
  import torch
8
 
9
+ from ._ops import add_op_namespace_prefix, ops
10
+
11
+
12
+ @torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
13
+ def _nvfp4_w4a4_decode_matvec_bf16out_fake(
14
+ a_packed: torch.Tensor,
15
+ b_packed: torch.Tensor,
16
+ sfa: torch.Tensor,
17
+ sfb: torch.Tensor,
18
+ out: torch.Tensor,
19
+ alpha: float = 1.0,
20
+ ) -> None:
21
+ if b_packed.dim() != 2:
22
+ raise RuntimeError("b_packed must have shape (N, K / 2)")
23
+ if out.shape != (b_packed.shape[0],):
24
+ raise RuntimeError("out shape must be (b_packed.shape[0],)")
25
+ return None
26
 
27
 
28
  def nvfp4_w4a4_decode_matvec_bf16out(
build/torch211-cxx11-cu128-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3dab9620683469830bc894b374cbceb448bfd36a61b3a6a49d081a13ea7c0d2
3
  size 120640
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7ac5141c17b2474cfa6383607064c81284d9f155a42a847bf2df2e098c30b8
3
  size 120640
build/torch211-cxx11-cu128-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_smallm_gemm_cuda_e9a1fe0
3
- ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_smallm_gemm_cuda_c4d802d
3
+ ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"
build/torch211-cxx11-cu128-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-smallm-gemm",
3
- "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-smallm-gemm",
3
+ "id": "_flashrt_smallm_gemm_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
build/torch211-cxx11-cu130-x86_64-linux/__init__.py CHANGED
@@ -6,7 +6,23 @@ from typing import Optional
6
 
7
  import torch
8
 
9
- from ._ops import ops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def nvfp4_w4a4_decode_matvec_bf16out(
 
6
 
7
  import torch
8
 
9
+ from ._ops import add_op_namespace_prefix, ops
10
+
11
+
12
+ @torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
13
+ def _nvfp4_w4a4_decode_matvec_bf16out_fake(
14
+ a_packed: torch.Tensor,
15
+ b_packed: torch.Tensor,
16
+ sfa: torch.Tensor,
17
+ sfb: torch.Tensor,
18
+ out: torch.Tensor,
19
+ alpha: float = 1.0,
20
+ ) -> None:
21
+ if b_packed.dim() != 2:
22
+ raise RuntimeError("b_packed must have shape (N, K / 2)")
23
+ if out.shape != (b_packed.shape[0],):
24
+ raise RuntimeError("out shape must be (b_packed.shape[0],)")
25
+ return None
26
 
27
 
28
  def nvfp4_w4a4_decode_matvec_bf16out(
build/torch211-cxx11-cu130-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b9a7f6a78cb3784d9efa7cde5349c220f183799254f23326aba75cd1accee4e
3
  size 122624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53937bd54da1c1f312ba9489e82900e689d4fc71867b2066890443a0f6419d4a
3
  size 122624
build/torch211-cxx11-cu130-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_smallm_gemm_cuda_e9a1fe0
3
- ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_smallm_gemm_cuda_c4d802d
3
+ ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"
build/torch211-cxx11-cu130-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-smallm-gemm",
3
- "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-smallm-gemm",
3
+ "id": "_flashrt_smallm_gemm_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
build/torch212-cxx11-cu130-x86_64-linux/__init__.py CHANGED
@@ -6,7 +6,23 @@ from typing import Optional
6
 
7
  import torch
8
 
9
- from ._ops import ops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def nvfp4_w4a4_decode_matvec_bf16out(
 
6
 
7
  import torch
8
 
9
+ from ._ops import add_op_namespace_prefix, ops
10
+
11
+
12
+ @torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
13
+ def _nvfp4_w4a4_decode_matvec_bf16out_fake(
14
+ a_packed: torch.Tensor,
15
+ b_packed: torch.Tensor,
16
+ sfa: torch.Tensor,
17
+ sfb: torch.Tensor,
18
+ out: torch.Tensor,
19
+ alpha: float = 1.0,
20
+ ) -> None:
21
+ if b_packed.dim() != 2:
22
+ raise RuntimeError("b_packed must have shape (N, K / 2)")
23
+ if out.shape != (b_packed.shape[0],):
24
+ raise RuntimeError("out shape must be (b_packed.shape[0],)")
25
+ return None
26
 
27
 
28
  def nvfp4_w4a4_decode_matvec_bf16out(
build/torch212-cxx11-cu130-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a673a1f997be6ea22666a1bcbade96917cb8f05137797809376e30721bba547
3
  size 133544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:339477e934e35c4acb976a01cc5d806bb01e776975c799db6a02862a3c45b26f
3
  size 133544
build/torch212-cxx11-cu130-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_smallm_gemm_cuda_e9a1fe0
3
- ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_smallm_gemm_cuda_c4d802d
3
+ ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"
build/torch212-cxx11-cu130-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-smallm-gemm",
3
- "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-smallm-gemm",
3
+ "id": "_flashrt_smallm_gemm_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
build/torch212-cxx11-cu132-x86_64-linux/__init__.py CHANGED
@@ -6,7 +6,23 @@ from typing import Optional
6
 
7
  import torch
8
 
9
- from ._ops import ops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def nvfp4_w4a4_decode_matvec_bf16out(
 
6
 
7
  import torch
8
 
9
+ from ._ops import add_op_namespace_prefix, ops
10
+
11
+
12
+ @torch.library.register_fake(add_op_namespace_prefix("nvfp4_w4a4_decode_matvec_bf16out"))
13
+ def _nvfp4_w4a4_decode_matvec_bf16out_fake(
14
+ a_packed: torch.Tensor,
15
+ b_packed: torch.Tensor,
16
+ sfa: torch.Tensor,
17
+ sfb: torch.Tensor,
18
+ out: torch.Tensor,
19
+ alpha: float = 1.0,
20
+ ) -> None:
21
+ if b_packed.dim() != 2:
22
+ raise RuntimeError("b_packed must have shape (N, K / 2)")
23
+ if out.shape != (b_packed.shape[0],):
24
+ raise RuntimeError("out shape must be (b_packed.shape[0],)")
25
+ return None
26
 
27
 
28
  def nvfp4_w4a4_decode_matvec_bf16out(
build/torch212-cxx11-cu132-x86_64-linux/{_flashrt_smallm_gemm_cuda_e9a1fe0.abi3.so → _flashrt_smallm_gemm_cuda_c4d802d.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6c6eeb21bcf2331def3c8f94f2b5af15d209f655475ecb07afe603c0be36eac
3
  size 133544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7351747dda516eb5d4512cccc9506ab7c5e86d118dc477c90eb411f2e3b03b7e
3
  size 133544
build/torch212-cxx11-cu132-x86_64-linux/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _flashrt_smallm_gemm_cuda_e9a1fe0
3
- ops = torch.ops._flashrt_smallm_gemm_cuda_e9a1fe0
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_flashrt_smallm_gemm_cuda_e9a1fe0::{op_name}"
 
1
  import torch
2
+ from . import _flashrt_smallm_gemm_cuda_c4d802d
3
+ ops = torch.ops._flashrt_smallm_gemm_cuda_c4d802d
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_flashrt_smallm_gemm_cuda_c4d802d::{op_name}"
build/torch212-cxx11-cu132-x86_64-linux/metadata.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "name": "flashrt-smallm-gemm",
3
- "id": "_flashrt_smallm_gemm_cuda_e9a1fe0",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],
 
1
  {
2
  "name": "flashrt-smallm-gemm",
3
+ "id": "_flashrt_smallm_gemm_cuda_c4d802d",
4
  "version": 1,
5
  "license": "Apache-2.0",
6
  "python-depends": [],