Build uploaded using `kernels`.
Browse files- build/torch210-cxx11-cu126-aarch64-linux/__init__.py +3 -0
- build/{torch29-cxx11-cu128-aarch64-linux/quantization_eetq → torch210-cxx11-cu126-aarch64-linux}/_ops.py +3 -3
- build/{torch29-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so → torch210-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so} +2 -2
- build/{torch29-cxx11-cu126-aarch64-linux/quantization_eetq → torch210-cxx11-cu126-aarch64-linux}/custom_ops.py +0 -0
- build/torch210-cxx11-cu126-aarch64-linux/metadata.json +18 -0
- build/torch210-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py +26 -0
- build/torch210-cxx11-cu128-aarch64-linux/__init__.py +3 -0
- build/{torch29-cxx11-cu126-aarch64-linux/quantization_eetq → torch210-cxx11-cu128-aarch64-linux}/_ops.py +3 -3
- build/{torch29-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so → torch210-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so} +2 -2
- build/{torch29-cxx11-cu128-aarch64-linux/quantization_eetq → torch210-cxx11-cu128-aarch64-linux}/custom_ops.py +0 -0
- build/torch210-cxx11-cu128-aarch64-linux/metadata.json +21 -0
- build/torch210-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py +26 -0
- build/torch29-cxx11-cu126-aarch64-linux/__init__.py +3 -0
- build/torch29-cxx11-cu126-aarch64-linux/_ops.py +9 -0
- build/torch29-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so +3 -0
- build/torch29-cxx11-cu126-aarch64-linux/custom_ops.py +36 -0
- build/torch29-cxx11-cu126-aarch64-linux/metadata.json +18 -0
- build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py +25 -2
- build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc +0 -0
- build/torch29-cxx11-cu128-aarch64-linux/__init__.py +3 -0
- build/torch29-cxx11-cu128-aarch64-linux/_ops.py +9 -0
- build/torch29-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so +3 -0
- build/torch29-cxx11-cu128-aarch64-linux/custom_ops.py +36 -0
- build/torch29-cxx11-cu128-aarch64-linux/metadata.json +21 -0
- build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py +25 -2
- build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc +0 -0
- build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc +0 -0
- build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc +0 -0
build/torch210-cxx11-cu126-aarch64-linux/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
| 2 |
+
|
| 3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/{torch29-cxx11-cu128-aarch64-linux/quantization_eetq → torch210-cxx11-cu126-aarch64-linux}/_ops.py
RENAMED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _quantization_eetq_cuda_07c987f
|
| 3 |
+
ops = torch.ops._quantization_eetq_cuda_07c987f
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_quantization_eetq_cuda_07c987f::{op_name}"
|
build/{torch29-cxx11-cu126-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so → torch210-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92e9fc1d3da860e4f6c312e870e28de0cbb92d0aa948adc3be37ff0af38751d6
|
| 3 |
+
size 39010048
|
build/{torch29-cxx11-cu126-aarch64-linux/quantization_eetq → torch210-cxx11-cu126-aarch64-linux}/custom_ops.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu126-aarch64-linux/metadata.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": 1,
|
| 3 |
+
"license": "Apache-2.0",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"7.0",
|
| 9 |
+
"7.2",
|
| 10 |
+
"7.5",
|
| 11 |
+
"8.0",
|
| 12 |
+
"8.6",
|
| 13 |
+
"8.7",
|
| 14 |
+
"8.9",
|
| 15 |
+
"9.0+PTX"
|
| 16 |
+
]
|
| 17 |
+
}
|
| 18 |
+
}
|
build/torch210-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctypes
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import importlib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from types import ModuleType
|
| 7 |
+
|
| 8 |
+
def _import_from_path(file_path: Path) -> ModuleType:
|
| 9 |
+
# We cannot use the module name as-is, after adding it to `sys.modules`,
|
| 10 |
+
# it would also be used for other imports. So, we make a module name that
|
| 11 |
+
# depends on the path for it to be unique using the hex-encoded hash of
|
| 12 |
+
# the path.
|
| 13 |
+
path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
|
| 14 |
+
module_name = path_hash
|
| 15 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 16 |
+
if spec is None:
|
| 17 |
+
raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
|
| 18 |
+
module = importlib.util.module_from_spec(spec)
|
| 19 |
+
if module is None:
|
| 20 |
+
raise ImportError(f"Cannot load module {module_name} from spec")
|
| 21 |
+
sys.modules[module_name] = module
|
| 22 |
+
spec.loader.exec_module(module) # type: ignore
|
| 23 |
+
return module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
|
build/torch210-cxx11-cu128-aarch64-linux/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
| 2 |
+
|
| 3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/{torch29-cxx11-cu126-aarch64-linux/quantization_eetq → torch210-cxx11-cu128-aarch64-linux}/_ops.py
RENAMED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import torch
|
| 2 |
-
from . import
|
| 3 |
-
ops = torch.ops.
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
-
return f"
|
|
|
|
| 1 |
import torch
|
| 2 |
+
from . import _quantization_eetq_cuda_07c987f
|
| 3 |
+
ops = torch.ops._quantization_eetq_cuda_07c987f
|
| 4 |
|
| 5 |
def add_op_namespace_prefix(op_name: str):
|
| 6 |
"""
|
| 7 |
Prefix op by namespace.
|
| 8 |
"""
|
| 9 |
+
return f"_quantization_eetq_cuda_07c987f::{op_name}"
|
build/{torch29-cxx11-cu128-aarch64-linux/quantization_eetq/_quantization_eetq_ee9ac42.abi3.so → torch210-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7546e7acd438f6f7b438906fddce53265b7684f63fa6e134e2d85ca414586cab
|
| 3 |
+
size 45366048
|
build/{torch29-cxx11-cu128-aarch64-linux/quantization_eetq → torch210-cxx11-cu128-aarch64-linux}/custom_ops.py
RENAMED
|
File without changes
|
build/torch210-cxx11-cu128-aarch64-linux/metadata.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": 1,
|
| 3 |
+
"license": "Apache-2.0",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"10.0",
|
| 9 |
+
"10.1",
|
| 10 |
+
"12.0+PTX",
|
| 11 |
+
"7.0",
|
| 12 |
+
"7.2",
|
| 13 |
+
"7.5",
|
| 14 |
+
"8.0",
|
| 15 |
+
"8.6",
|
| 16 |
+
"8.7",
|
| 17 |
+
"8.9",
|
| 18 |
+
"9.0"
|
| 19 |
+
]
|
| 20 |
+
}
|
| 21 |
+
}
|
build/torch210-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctypes
|
| 2 |
+
import sys
|
| 3 |
+
|
| 4 |
+
import importlib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from types import ModuleType
|
| 7 |
+
|
| 8 |
+
def _import_from_path(file_path: Path) -> ModuleType:
|
| 9 |
+
# We cannot use the module name as-is, after adding it to `sys.modules`,
|
| 10 |
+
# it would also be used for other imports. So, we make a module name that
|
| 11 |
+
# depends on the path for it to be unique using the hex-encoded hash of
|
| 12 |
+
# the path.
|
| 13 |
+
path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
|
| 14 |
+
module_name = path_hash
|
| 15 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 16 |
+
if spec is None:
|
| 17 |
+
raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
|
| 18 |
+
module = importlib.util.module_from_spec(spec)
|
| 19 |
+
if module is None:
|
| 20 |
+
raise ImportError(f"Cannot load module {module_name} from spec")
|
| 21 |
+
sys.modules[module_name] = module
|
| 22 |
+
spec.loader.exec_module(module) # type: ignore
|
| 23 |
+
return module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
|
build/torch29-cxx11-cu126-aarch64-linux/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
| 2 |
+
|
| 3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch29-cxx11-cu126-aarch64-linux/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _quantization_eetq_cuda_07c987f
|
| 3 |
+
ops = torch.ops._quantization_eetq_cuda_07c987f
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_quantization_eetq_cuda_07c987f::{op_name}"
|
build/torch29-cxx11-cu126-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0dc7862071e70892ff753e2c3b90f40e9f67f698d585b5be9fca0dc1e8d92fb
|
| 3 |
+
size 39006704
|
build/torch29-cxx11-cu126-aarch64-linux/custom_ops.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def w8_a16_gemm(
|
| 8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
| 9 |
+
) -> torch.Tensor:
|
| 10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def w8_a16_gemm_(
|
| 14 |
+
input: torch.Tensor,
|
| 15 |
+
weight: torch.Tensor,
|
| 16 |
+
scale: torch.Tensor,
|
| 17 |
+
output: torch.Tensor,
|
| 18 |
+
m: int,
|
| 19 |
+
n: int,
|
| 20 |
+
k: int,
|
| 21 |
+
) -> torch.Tensor:
|
| 22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
| 26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def quant_weights(
|
| 30 |
+
origin_weight: torch.Tensor,
|
| 31 |
+
quant_type: torch.dtype,
|
| 32 |
+
return_unprocessed_quantized_tensor: bool,
|
| 33 |
+
) -> List[torch.Tensor]:
|
| 34 |
+
return ops.quant_weights(
|
| 35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
| 36 |
+
)
|
build/torch29-cxx11-cu126-aarch64-linux/metadata.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": 1,
|
| 3 |
+
"license": "Apache-2.0",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"7.0",
|
| 9 |
+
"7.2",
|
| 10 |
+
"7.5",
|
| 11 |
+
"8.0",
|
| 12 |
+
"8.6",
|
| 13 |
+
"8.7",
|
| 14 |
+
"8.9",
|
| 15 |
+
"9.0+PTX"
|
| 16 |
+
]
|
| 17 |
+
}
|
| 18 |
+
}
|
build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__init__.py
CHANGED
|
@@ -1,3 +1,26 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctypes
|
| 2 |
+
import sys
|
| 3 |
|
| 4 |
+
import importlib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from types import ModuleType
|
| 7 |
+
|
| 8 |
+
def _import_from_path(file_path: Path) -> ModuleType:
|
| 9 |
+
# We cannot use the module name as-is, after adding it to `sys.modules`,
|
| 10 |
+
# it would also be used for other imports. So, we make a module name that
|
| 11 |
+
# depends on the path for it to be unique using the hex-encoded hash of
|
| 12 |
+
# the path.
|
| 13 |
+
path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
|
| 14 |
+
module_name = path_hash
|
| 15 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 16 |
+
if spec is None:
|
| 17 |
+
raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
|
| 18 |
+
module = importlib.util.module_from_spec(spec)
|
| 19 |
+
if module is None:
|
| 20 |
+
raise ImportError(f"Cannot load module {module_name} from spec")
|
| 21 |
+
sys.modules[module_name] = module
|
| 22 |
+
spec.loader.exec_module(module) # type: ignore
|
| 23 |
+
return module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
|
build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
DELETED
|
Binary file (335 Bytes)
|
|
|
build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
DELETED
|
Binary file (548 Bytes)
|
|
|
build/torch29-cxx11-cu126-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
DELETED
|
Binary file (1.84 kB)
|
|
|
build/torch29-cxx11-cu128-aarch64-linux/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .custom_ops import w8_a16_gemm, w8_a16_gemm_, preprocess_weights, quant_weights
|
| 2 |
+
|
| 3 |
+
__all__ = ["w8_a16_gemm", "w8_a16_gemm_", "preprocess_weights", "quant_weights"]
|
build/torch29-cxx11-cu128-aarch64-linux/_ops.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from . import _quantization_eetq_cuda_07c987f
|
| 3 |
+
ops = torch.ops._quantization_eetq_cuda_07c987f
|
| 4 |
+
|
| 5 |
+
def add_op_namespace_prefix(op_name: str):
|
| 6 |
+
"""
|
| 7 |
+
Prefix op by namespace.
|
| 8 |
+
"""
|
| 9 |
+
return f"_quantization_eetq_cuda_07c987f::{op_name}"
|
build/torch29-cxx11-cu128-aarch64-linux/_quantization_eetq_cuda_07c987f.abi3.so
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fe7e9d12743a281594a2bcc46d8592a8f1bc6d74c0e51bb1c85d04a67e5001c
|
| 3 |
+
size 45296736
|
build/torch29-cxx11-cu128-aarch64-linux/custom_ops.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
import torch
|
| 3 |
+
|
| 4 |
+
from ._ops import ops
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def w8_a16_gemm(
|
| 8 |
+
input: torch.Tensor, weight: torch.Tensor, scale: torch.Tensor
|
| 9 |
+
) -> torch.Tensor:
|
| 10 |
+
return ops.w8_a16_gemm(input, weight, scale)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def w8_a16_gemm_(
|
| 14 |
+
input: torch.Tensor,
|
| 15 |
+
weight: torch.Tensor,
|
| 16 |
+
scale: torch.Tensor,
|
| 17 |
+
output: torch.Tensor,
|
| 18 |
+
m: int,
|
| 19 |
+
n: int,
|
| 20 |
+
k: int,
|
| 21 |
+
) -> torch.Tensor:
|
| 22 |
+
return ops.w8_a16_gemm_(input, weight, scale, output, m, n, k)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def preprocess_weights(origin_weight: torch.Tensor, is_int4: bool) -> torch.Tensor:
|
| 26 |
+
return ops.preprocess_weights(origin_weight, is_int4)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def quant_weights(
|
| 30 |
+
origin_weight: torch.Tensor,
|
| 31 |
+
quant_type: torch.dtype,
|
| 32 |
+
return_unprocessed_quantized_tensor: bool,
|
| 33 |
+
) -> List[torch.Tensor]:
|
| 34 |
+
return ops.quant_weights(
|
| 35 |
+
origin_weight, quant_type, return_unprocessed_quantized_tensor
|
| 36 |
+
)
|
build/torch29-cxx11-cu128-aarch64-linux/metadata.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": 1,
|
| 3 |
+
"license": "Apache-2.0",
|
| 4 |
+
"python-depends": [],
|
| 5 |
+
"backend": {
|
| 6 |
+
"type": "cuda",
|
| 7 |
+
"archs": [
|
| 8 |
+
"10.0",
|
| 9 |
+
"10.1",
|
| 10 |
+
"12.0+PTX",
|
| 11 |
+
"7.0",
|
| 12 |
+
"7.2",
|
| 13 |
+
"7.5",
|
| 14 |
+
"8.0",
|
| 15 |
+
"8.6",
|
| 16 |
+
"8.7",
|
| 17 |
+
"8.9",
|
| 18 |
+
"9.0"
|
| 19 |
+
]
|
| 20 |
+
}
|
| 21 |
+
}
|
build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__init__.py
CHANGED
|
@@ -1,3 +1,26 @@
|
|
| 1 |
-
|
|
|
|
| 2 |
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import ctypes
|
| 2 |
+
import sys
|
| 3 |
|
| 4 |
+
import importlib
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from types import ModuleType
|
| 7 |
+
|
| 8 |
+
def _import_from_path(file_path: Path) -> ModuleType:
|
| 9 |
+
# We cannot use the module name as-is, after adding it to `sys.modules`,
|
| 10 |
+
# it would also be used for other imports. So, we make a module name that
|
| 11 |
+
# depends on the path for it to be unique using the hex-encoded hash of
|
| 12 |
+
# the path.
|
| 13 |
+
path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value)
|
| 14 |
+
module_name = path_hash
|
| 15 |
+
spec = importlib.util.spec_from_file_location(module_name, file_path)
|
| 16 |
+
if spec is None:
|
| 17 |
+
raise ImportError(f"Cannot load spec for {module_name} from {file_path}")
|
| 18 |
+
module = importlib.util.module_from_spec(spec)
|
| 19 |
+
if module is None:
|
| 20 |
+
raise ImportError(f"Cannot load module {module_name} from spec")
|
| 21 |
+
sys.modules[module_name] = module
|
| 22 |
+
spec.loader.exec_module(module) # type: ignore
|
| 23 |
+
return module
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py")))
|
build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/__init__.cpython-313.pyc
DELETED
|
Binary file (335 Bytes)
|
|
|
build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/_ops.cpython-313.pyc
DELETED
|
Binary file (548 Bytes)
|
|
|
build/torch29-cxx11-cu128-aarch64-linux/quantization_eetq/__pycache__/custom_ops.cpython-313.pyc
DELETED
|
Binary file (1.84 kB)
|
|
|