diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_ops.py b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py index bcd6f547a3212e4e5fd7b7c58ea1f4439bada1fe..08f25325eaf8e3891c61289578641347f860e4b3 100644 --- a/build/torch210-cxx11-cu126-x86_64-linux/_ops.py +++ b/build/torch210-cxx11-cu126-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_cuda_86f75d9 -ops = torch.ops._paged_attention_cuda_86f75d9 +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_cuda_86f75d9::{op_name}" + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch210-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..33d074ddbee49c2e3f7e19855c98461fef7371ef --- /dev/null +++ b/build/torch210-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78a5a91ffa5a61aa74fb403262b0558f223898b63777968d8efbf6f31376e3cf +size 140162704 diff --git a/build/torch210-cxx11-cu126-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so b/build/torch210-cxx11-cu126-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so deleted file mode 100644 index 5476242de69b519040345274470a4d540360b43d..0000000000000000000000000000000000000000 --- a/build/torch210-cxx11-cu126-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f3b914c6cb87a56bf56630bb51a53ec05762bbbcca7b61bd8cdb1b07e1b4978c -size 140162704 diff --git a/build/torch210-cxx11-cu126-x86_64-linux/metadata.json b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json index 306d65f7181e2051428dbe2e34d03f5856171a2e..d94667c7beed61b7bc4da10ae310f050d46cfdc4 100644 --- a/build/torch210-cxx11-cu126-x86_64-linux/metadata.json +++ b/build/torch210-cxx11-cu126-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_cuda_86f75d9", + "id": "_paged_attention_cuda_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_ops.py b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py index bcd6f547a3212e4e5fd7b7c58ea1f4439bada1fe..08f25325eaf8e3891c61289578641347f860e4b3 100644 --- a/build/torch210-cxx11-cu128-x86_64-linux/_ops.py +++ b/build/torch210-cxx11-cu128-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_cuda_86f75d9 -ops = torch.ops._paged_attention_cuda_86f75d9 +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_cuda_86f75d9::{op_name}" + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch210-cxx11-cu128-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..06a322b9b6eb5230582482473dc7f85cffe451f4 --- /dev/null +++ b/build/torch210-cxx11-cu128-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567e0ca0a3bd4f76e932894435215b92764fdb4e481463e81801c8a259db87dd +size 167726096 diff --git a/build/torch210-cxx11-cu128-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so b/build/torch210-cxx11-cu128-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so deleted file mode 100644 index 497963aab71396203aaf7ed36ba0c630ac589b31..0000000000000000000000000000000000000000 --- a/build/torch210-cxx11-cu128-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f827940d8899079daaa40f441d0b03198ca0196b28a66b888f2ea50fbf078c0a -size 167726096 diff --git a/build/torch210-cxx11-cu128-x86_64-linux/metadata.json b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json index 466651911dd8aaf31f45a3b89df181d9e3c255f2..d38efa93928268034bd7812021a019fd274c7888 100644 --- a/build/torch210-cxx11-cu128-x86_64-linux/metadata.json +++ b/build/torch210-cxx11-cu128-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_cuda_86f75d9", + "id": "_paged_attention_cuda_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_ops.py b/build/torch210-cxx11-cu130-x86_64-linux/_ops.py index bcd6f547a3212e4e5fd7b7c58ea1f4439bada1fe..08f25325eaf8e3891c61289578641347f860e4b3 100644 --- a/build/torch210-cxx11-cu130-x86_64-linux/_ops.py +++ b/build/torch210-cxx11-cu130-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_cuda_86f75d9 -ops = torch.ops._paged_attention_cuda_86f75d9 +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_cuda_86f75d9::{op_name}" + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch210-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..50444be5cf3d518a71142c2ef06a0b86a49b6726 --- /dev/null +++ b/build/torch210-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b62d68a6276199ed7e179425c7bf434f3198b1cfa73d947cd1466f562edd99 +size 86560024 diff --git a/build/torch210-cxx11-cu130-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so b/build/torch210-cxx11-cu130-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so deleted file mode 100644 index 5158726b1d57c828053d5b194b78e6be2511c026..0000000000000000000000000000000000000000 --- a/build/torch210-cxx11-cu130-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:267cdf92780bf9c31872fb9e91a87f5ad16dc7978e44233cd83cf22c76d2c313 -size 86560024 diff --git a/build/torch210-cxx11-cu130-x86_64-linux/metadata.json b/build/torch210-cxx11-cu130-x86_64-linux/metadata.json index 0005d4baa129cae8e35c2137ef034e4afa5be223..61f5a0521120b48141a0edc597691f9ba67885c7 100644 --- a/build/torch210-cxx11-cu130-x86_64-linux/metadata.json +++ b/build/torch210-cxx11-cu130-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_cuda_86f75d9", + "id": "_paged_attention_cuda_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], diff --git a/build/torch210-cxx11-rocm70-x86_64-linux/_ops.py b/build/torch210-cxx11-rocm70-x86_64-linux/_ops.py index 4668ea2561fb20890ce1b6f04b1f68336edfa519..da470d69423ba6440c271ea421c44bf1d3726853 100644 --- a/build/torch210-cxx11-rocm70-x86_64-linux/_ops.py +++ b/build/torch210-cxx11-rocm70-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_rocm_86f75d9 -ops = torch.ops._paged_attention_rocm_86f75d9 +from . import _paged_attention_rocm_2567cd6 +ops = torch.ops._paged_attention_rocm_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_rocm_86f75d9::{op_name}" + return f"_paged_attention_rocm_2567cd6::{op_name}" diff --git a/build/torch210-cxx11-rocm70-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so b/build/torch210-cxx11-rocm70-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..3ca90bd9ca76527991e987a35d0bb655ae9f2f6a --- /dev/null +++ b/build/torch210-cxx11-rocm70-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ffe2ab8eaf52cfb8b656f0e4a7405f818d2d3837986862ba75c6efcf89fe7b +size 84940576 diff --git a/build/torch210-cxx11-rocm70-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so b/build/torch210-cxx11-rocm70-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so deleted file mode 100644 index 57affc7e980aab1f2c8181ab99808608750bcdaf..0000000000000000000000000000000000000000 --- a/build/torch210-cxx11-rocm70-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c9198dc28f1a1f1c9b82c462bb2bc60aeb3e09b43295ed36aefc66d7e7af74f2 -size 58992416 diff --git a/build/torch210-cxx11-rocm70-x86_64-linux/metadata.json b/build/torch210-cxx11-rocm70-x86_64-linux/metadata.json index 4f8745c111cd3f4fa8b9440f9918c8af96a30840..5a8c4d218743dcdd3752770553dfaf6cfc59ae12 100644 --- a/build/torch210-cxx11-rocm70-x86_64-linux/metadata.json +++ b/build/torch210-cxx11-rocm70-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_rocm_86f75d9", + "id": "_paged_attention_rocm_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], @@ -10,10 +10,13 @@ "gfx1030", "gfx1100", "gfx1101", + "gfx1200", + "gfx1201", "gfx906", "gfx908", "gfx90a", - "gfx942" + "gfx942", + "gfx950" ] } } diff --git a/build/torch210-cxx11-rocm71-x86_64-linux/_ops.py b/build/torch210-cxx11-rocm71-x86_64-linux/_ops.py index 4668ea2561fb20890ce1b6f04b1f68336edfa519..da470d69423ba6440c271ea421c44bf1d3726853 100644 --- a/build/torch210-cxx11-rocm71-x86_64-linux/_ops.py +++ b/build/torch210-cxx11-rocm71-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_rocm_86f75d9 -ops = torch.ops._paged_attention_rocm_86f75d9 +from . import _paged_attention_rocm_2567cd6 +ops = torch.ops._paged_attention_rocm_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_rocm_86f75d9::{op_name}" + return f"_paged_attention_rocm_2567cd6::{op_name}" diff --git a/build/torch210-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so b/build/torch210-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..ba880f7882e120ea8da1cb48c9c194fc2531fc2c --- /dev/null +++ b/build/torch210-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86fc9985afc44618710a543609c0694cf30bf2812c8aa0d0d5fb3048315d4f78 +size 84911808 diff --git a/build/torch210-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so b/build/torch210-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so deleted file mode 100644 index d5535dc3ad8a0614bb1f21d50cf3e2121f2d03a6..0000000000000000000000000000000000000000 --- a/build/torch210-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7252bc2aec0ffa6e9fd79fa24c7f2ec4556538e84ac099467cdd91d97ae7045e -size 58971840 diff --git a/build/torch210-cxx11-rocm71-x86_64-linux/metadata.json b/build/torch210-cxx11-rocm71-x86_64-linux/metadata.json index 4f8745c111cd3f4fa8b9440f9918c8af96a30840..5a8c4d218743dcdd3752770553dfaf6cfc59ae12 100644 --- a/build/torch210-cxx11-rocm71-x86_64-linux/metadata.json +++ b/build/torch210-cxx11-rocm71-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_rocm_86f75d9", + "id": "_paged_attention_rocm_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], @@ -10,10 +10,13 @@ "gfx1030", "gfx1100", "gfx1101", + "gfx1200", + "gfx1201", "gfx906", "gfx908", "gfx90a", - "gfx942" + "gfx942", + "gfx950" ] } } diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_ops.py b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py index bcd6f547a3212e4e5fd7b7c58ea1f4439bada1fe..08f25325eaf8e3891c61289578641347f860e4b3 100644 --- a/build/torch211-cxx11-cu126-x86_64-linux/_ops.py +++ b/build/torch211-cxx11-cu126-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_cuda_86f75d9 -ops = torch.ops._paged_attention_cuda_86f75d9 +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_cuda_86f75d9::{op_name}" + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch211-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..db8b4414cd9af62bdca722aeeac9dde74090b051 --- /dev/null +++ b/build/torch211-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f245ffbd19fd92706e7332f1bdc35da933080407fc085e8b0fdd47007e1ee29 +size 140147224 diff --git a/build/torch211-cxx11-cu126-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so b/build/torch211-cxx11-cu126-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so deleted file mode 100644 index f79d7b6ad8ae6f0ca6840beda659a92c07fa74b5..0000000000000000000000000000000000000000 --- a/build/torch211-cxx11-cu126-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6f93258416e403f339435e72ae67787fb880962f5759dfaec3ab9202ed932b37 -size 140147224 diff --git a/build/torch211-cxx11-cu126-x86_64-linux/metadata.json b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json index 306d65f7181e2051428dbe2e34d03f5856171a2e..d94667c7beed61b7bc4da10ae310f050d46cfdc4 100644 --- a/build/torch211-cxx11-cu126-x86_64-linux/metadata.json +++ b/build/torch211-cxx11-cu126-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_cuda_86f75d9", + "id": "_paged_attention_cuda_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_ops.py b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py index bcd6f547a3212e4e5fd7b7c58ea1f4439bada1fe..08f25325eaf8e3891c61289578641347f860e4b3 100644 --- a/build/torch211-cxx11-cu128-x86_64-linux/_ops.py +++ b/build/torch211-cxx11-cu128-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_cuda_86f75d9 -ops = torch.ops._paged_attention_cuda_86f75d9 +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_cuda_86f75d9::{op_name}" + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch211-cxx11-cu128-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..bd07a6e63b988965cf6c7a7be9fdb53223dde788 --- /dev/null +++ b/build/torch211-cxx11-cu128-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e8db1a82efeed8114df045332d9cf9cc5897896b46997eb834b926b8457529 +size 167710616 diff --git a/build/torch211-cxx11-cu128-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so b/build/torch211-cxx11-cu128-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so deleted file mode 100644 index 59122fcadb4562f8837ab6737867129a90611770..0000000000000000000000000000000000000000 --- a/build/torch211-cxx11-cu128-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5978826c7beb813daca2a0a9d596cd254c24c181b2c3922ed3b17771357bc20e -size 167710616 diff --git a/build/torch211-cxx11-cu128-x86_64-linux/metadata.json b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json index 466651911dd8aaf31f45a3b89df181d9e3c255f2..d38efa93928268034bd7812021a019fd274c7888 100644 --- a/build/torch211-cxx11-cu128-x86_64-linux/metadata.json +++ b/build/torch211-cxx11-cu128-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_cuda_86f75d9", + "id": "_paged_attention_cuda_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], diff --git a/build/torch211-cxx11-cu130-x86_64-linux/_ops.py b/build/torch211-cxx11-cu130-x86_64-linux/_ops.py index bcd6f547a3212e4e5fd7b7c58ea1f4439bada1fe..08f25325eaf8e3891c61289578641347f860e4b3 100644 --- a/build/torch211-cxx11-cu130-x86_64-linux/_ops.py +++ b/build/torch211-cxx11-cu130-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_cuda_86f75d9 -ops = torch.ops._paged_attention_cuda_86f75d9 +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_cuda_86f75d9::{op_name}" + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch211-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch211-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..661db440362adc0792556d101de789f90a62156b --- /dev/null +++ b/build/torch211-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97cfe7c0fb322c1ddda6253cfa6462f7320b82db874a93aee161857dfa3ee28 +size 86548640 diff --git a/build/torch211-cxx11-cu130-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so b/build/torch211-cxx11-cu130-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so deleted file mode 100644 index 6ff806e47cfcd296a7354911462f9fdd28eb3531..0000000000000000000000000000000000000000 --- a/build/torch211-cxx11-cu130-x86_64-linux/_paged_attention_cuda_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b897c1185c95f89f2dcc3a6fc13ea9a486d5939fc03574930689b4b89037efeb -size 86548640 diff --git a/build/torch211-cxx11-cu130-x86_64-linux/metadata.json b/build/torch211-cxx11-cu130-x86_64-linux/metadata.json index 0005d4baa129cae8e35c2137ef034e4afa5be223..61f5a0521120b48141a0edc597691f9ba67885c7 100644 --- a/build/torch211-cxx11-cu130-x86_64-linux/metadata.json +++ b/build/torch211-cxx11-cu130-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_cuda_86f75d9", + "id": "_paged_attention_cuda_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], diff --git a/build/torch211-cxx11-rocm71-x86_64-linux/_ops.py b/build/torch211-cxx11-rocm71-x86_64-linux/_ops.py index 4668ea2561fb20890ce1b6f04b1f68336edfa519..da470d69423ba6440c271ea421c44bf1d3726853 100644 --- a/build/torch211-cxx11-rocm71-x86_64-linux/_ops.py +++ b/build/torch211-cxx11-rocm71-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_rocm_86f75d9 -ops = torch.ops._paged_attention_rocm_86f75d9 +from . import _paged_attention_rocm_2567cd6 +ops = torch.ops._paged_attention_rocm_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_rocm_86f75d9::{op_name}" + return f"_paged_attention_rocm_2567cd6::{op_name}" diff --git a/build/torch211-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so b/build/torch211-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..fc725d53d16752d44fa0b8f3fd4106a63086eec2 --- /dev/null +++ b/build/torch211-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee1fb79c594e620c68cab777fc523daac3791bb95533bb1ad1cccf7bfec6ede +size 84904144 diff --git a/build/torch211-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so b/build/torch211-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so deleted file mode 100644 index c4b9879c86e4f53b374b7ea1bb5cc045c13bdf0a..0000000000000000000000000000000000000000 --- a/build/torch211-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3252177fcd8f47c0252a3f63d89f800612ecbd33766e10dabae075a63727e486 -size 58964176 diff --git a/build/torch211-cxx11-rocm71-x86_64-linux/metadata.json b/build/torch211-cxx11-rocm71-x86_64-linux/metadata.json index 4f8745c111cd3f4fa8b9440f9918c8af96a30840..5a8c4d218743dcdd3752770553dfaf6cfc59ae12 100644 --- a/build/torch211-cxx11-rocm71-x86_64-linux/metadata.json +++ b/build/torch211-cxx11-rocm71-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_rocm_86f75d9", + "id": "_paged_attention_rocm_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], @@ -10,10 +10,13 @@ "gfx1030", "gfx1100", "gfx1101", + "gfx1200", + "gfx1201", "gfx906", "gfx908", "gfx90a", - "gfx942" + "gfx942", + "gfx950" ] } } diff --git a/build/torch211-cxx11-rocm72-x86_64-linux/_ops.py b/build/torch211-cxx11-rocm72-x86_64-linux/_ops.py index 4668ea2561fb20890ce1b6f04b1f68336edfa519..da470d69423ba6440c271ea421c44bf1d3726853 100644 --- a/build/torch211-cxx11-rocm72-x86_64-linux/_ops.py +++ b/build/torch211-cxx11-rocm72-x86_64-linux/_ops.py @@ -1,9 +1,9 @@ import torch -from . import _paged_attention_rocm_86f75d9 -ops = torch.ops._paged_attention_rocm_86f75d9 +from . import _paged_attention_rocm_2567cd6 +ops = torch.ops._paged_attention_rocm_2567cd6 def add_op_namespace_prefix(op_name: str): """ Prefix op by namespace. """ - return f"_paged_attention_rocm_86f75d9::{op_name}" + return f"_paged_attention_rocm_2567cd6::{op_name}" diff --git a/build/torch211-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so b/build/torch211-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..ff46c35fc3ea4894712b7ff3bac5b7b14c13a6bd --- /dev/null +++ b/build/torch211-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b3627300ef4751613651def9c14affb20172f498a77485e1e87482b1a69e730 +size 81658920 diff --git a/build/torch211-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so b/build/torch211-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so deleted file mode 100644 index 359ef146745b04821b824fa35e587dc2bc5e2fe5..0000000000000000000000000000000000000000 --- a/build/torch211-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_86f75d9.abi3.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:476f879b9277e59e1284a2f1eb5fba06b6e49d660d9ddddcfb03fbe9a59ee00f -size 56640560 diff --git a/build/torch211-cxx11-rocm72-x86_64-linux/metadata.json b/build/torch211-cxx11-rocm72-x86_64-linux/metadata.json index 4f8745c111cd3f4fa8b9440f9918c8af96a30840..5a8c4d218743dcdd3752770553dfaf6cfc59ae12 100644 --- a/build/torch211-cxx11-rocm72-x86_64-linux/metadata.json +++ b/build/torch211-cxx11-rocm72-x86_64-linux/metadata.json @@ -1,6 +1,6 @@ { "name": "paged-attention", - "id": "_paged_attention_rocm_86f75d9", + "id": "_paged_attention_rocm_2567cd6", "version": 1, "license": "Apache-2.0", "python-depends": [], @@ -10,10 +10,13 @@ "gfx1030", "gfx1100", "gfx1101", + "gfx1200", + "gfx1201", "gfx906", "gfx908", "gfx90a", - "gfx942" + "gfx942", + "gfx950" ] } } diff --git a/build/torch212-cxx11-cu126-x86_64-linux/__init__.py b/build/torch212-cxx11-cu126-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9de56043369487facc1f163df6bd319c9806e5ca --- /dev/null +++ b/build/torch212-cxx11-cu126-x86_64-linux/__init__.py @@ -0,0 +1,21 @@ +from ._custom_ops import ( + convert_fp8, + copy_blocks, + paged_attention_v1, + paged_attention_v2, + reshape_and_cache, + reshape_and_cache_flash, + swap_blocks, +) +from ._ops import ops + +__all__ = [ + "convert_fp8", + "copy_blocks", + "ops", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "reshape_and_cache_flash", + "swap_blocks", +] diff --git a/build/torch212-cxx11-cu126-x86_64-linux/_custom_ops.py b/build/torch212-cxx11-cu126-x86_64-linux/_custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c0b8db085468dee5100c98d14106a9ee917bf2 --- /dev/null +++ b/build/torch212-cxx11-cu126-x86_64-linux/_custom_ops.py @@ -0,0 +1,173 @@ +from typing import List, Optional + +import torch + +from ._ops import ops + + +# page attention ops +def paged_attention_v1( + out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v1( + out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def paged_attention_v2( + out: torch.Tensor, + exp_sum: torch.Tensor, + max_logits: torch.Tensor, + tmp_out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v2( + out, + exp_sum, + max_logits, + tmp_out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def reshape_and_cache( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: float, + v_scale: float, +) -> None: + ops.reshape_and_cache( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def reshape_and_cache_flash( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: torch.Tensor, + v_scale: torch.Tensor, +) -> None: + ops.reshape_and_cache_flash( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def copy_blocks( + key_caches: List[torch.Tensor], + value_caches: List[torch.Tensor], + block_mapping: torch.Tensor, +) -> None: + ops.copy_blocks(key_caches, value_caches, block_mapping) + + +def swap_blocks( + src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor +) -> None: + ops.swap_blocks(src, dst, block_mapping) + + +def convert_fp8( + output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8" +) -> None: + ops.convert_fp8(output, input, scale, kv_dtype) + + +__all__ = [ + "convert_fp8", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "copy_blocks", +] diff --git a/build/torch212-cxx11-cu126-x86_64-linux/_ops.py b/build/torch212-cxx11-cu126-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..08f25325eaf8e3891c61289578641347f860e4b3 --- /dev/null +++ b/build/torch212-cxx11-cu126-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch212-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch212-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..df80c84169ccf3b7506c6ac07b5825a2c133d24e --- /dev/null +++ b/build/torch212-cxx11-cu126-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19c56995b23b12b3d959c33eccdba1d519801a748c5c8bcae6bf9d4626ba477 +size 140152776 diff --git a/build/torch212-cxx11-cu126-x86_64-linux/metadata.json b/build/torch212-cxx11-cu126-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d94667c7beed61b7bc4da10ae310f050d46cfdc4 --- /dev/null +++ b/build/torch212-cxx11-cu126-x86_64-linux/metadata.json @@ -0,0 +1,20 @@ +{ + "name": "paged-attention", + "id": "_paged_attention_cuda_2567cd6", + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "7.0", + "7.2", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0+PTX" + ] + } +} diff --git a/build/torch212-cxx11-cu126-x86_64-linux/paged_attention/__init__.py b/build/torch212-cxx11-cu126-x86_64-linux/paged_attention/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch212-cxx11-cu126-x86_64-linux/paged_attention/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch212-cxx11-cu126-x86_64-linux/platforms.py b/build/torch212-cxx11-cu126-x86_64-linux/platforms.py new file mode 100644 index 0000000000000000000000000000000000000000..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 --- /dev/null +++ b/build/torch212-cxx11-cu126-x86_64-linux/platforms.py @@ -0,0 +1,92 @@ +import os +import random +from abc import ABC, abstractmethod +from functools import lru_cache, wraps +from typing import Callable, ParamSpec, TypeVar + +import numpy as np +import torch + +IS_ROCM = torch.version.hip is not None +IS_MPS = torch.backends.mps.is_available() + + +class Platform(ABC): + @classmethod + def seed_everything(cls, seed: int) -> None: + """ + Set the seed of each random module. + `torch.manual_seed` will set seed on all devices. + + Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20 + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + @abstractmethod + def get_device_name(self, device_id: int = 0) -> str: ... + + @abstractmethod + def is_cuda(self) -> bool: ... + + @abstractmethod + def is_rocm(self) -> bool: ... + + @abstractmethod + def is_mps(self) -> bool: ... + + +class CudaPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(0) + + def is_cuda(self) -> bool: + return True + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return False + + +class RocmPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return True + + def is_mps(self) -> bool: + return False + + +class MpsPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return True + +current_platform = ( + RocmPlatform() if IS_ROCM else + MpsPlatform() if IS_MPS else + CudaPlatform() if torch.cuda.is_available() else + None +) diff --git a/build/torch212-cxx11-cu130-x86_64-linux/__init__.py b/build/torch212-cxx11-cu130-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9de56043369487facc1f163df6bd319c9806e5ca --- /dev/null +++ b/build/torch212-cxx11-cu130-x86_64-linux/__init__.py @@ -0,0 +1,21 @@ +from ._custom_ops import ( + convert_fp8, + copy_blocks, + paged_attention_v1, + paged_attention_v2, + reshape_and_cache, + reshape_and_cache_flash, + swap_blocks, +) +from ._ops import ops + +__all__ = [ + "convert_fp8", + "copy_blocks", + "ops", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "reshape_and_cache_flash", + "swap_blocks", +] diff --git a/build/torch212-cxx11-cu130-x86_64-linux/_custom_ops.py b/build/torch212-cxx11-cu130-x86_64-linux/_custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c0b8db085468dee5100c98d14106a9ee917bf2 --- /dev/null +++ b/build/torch212-cxx11-cu130-x86_64-linux/_custom_ops.py @@ -0,0 +1,173 @@ +from typing import List, Optional + +import torch + +from ._ops import ops + + +# page attention ops +def paged_attention_v1( + out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v1( + out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def paged_attention_v2( + out: torch.Tensor, + exp_sum: torch.Tensor, + max_logits: torch.Tensor, + tmp_out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v2( + out, + exp_sum, + max_logits, + tmp_out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def reshape_and_cache( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: float, + v_scale: float, +) -> None: + ops.reshape_and_cache( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def reshape_and_cache_flash( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: torch.Tensor, + v_scale: torch.Tensor, +) -> None: + ops.reshape_and_cache_flash( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def copy_blocks( + key_caches: List[torch.Tensor], + value_caches: List[torch.Tensor], + block_mapping: torch.Tensor, +) -> None: + ops.copy_blocks(key_caches, value_caches, block_mapping) + + +def swap_blocks( + src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor +) -> None: + ops.swap_blocks(src, dst, block_mapping) + + +def convert_fp8( + output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8" +) -> None: + ops.convert_fp8(output, input, scale, kv_dtype) + + +__all__ = [ + "convert_fp8", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "copy_blocks", +] diff --git a/build/torch212-cxx11-cu130-x86_64-linux/_ops.py b/build/torch212-cxx11-cu130-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..08f25325eaf8e3891c61289578641347f860e4b3 --- /dev/null +++ b/build/torch212-cxx11-cu130-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch212-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch212-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..1d420023f626a166f3cfa92a49669470e917910b --- /dev/null +++ b/build/torch212-cxx11-cu130-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5c10b69f874b366d4c7e245f31f4d8a4dc7ee6a869848e9cc2b4408394426d1 +size 86554152 diff --git a/build/torch212-cxx11-cu130-x86_64-linux/metadata.json b/build/torch212-cxx11-cu130-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..61f5a0521120b48141a0edc597691f9ba67885c7 --- /dev/null +++ b/build/torch212-cxx11-cu130-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "name": "paged-attention", + "id": "_paged_attention_cuda_2567cd6", + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch212-cxx11-cu130-x86_64-linux/paged_attention/__init__.py b/build/torch212-cxx11-cu130-x86_64-linux/paged_attention/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch212-cxx11-cu130-x86_64-linux/paged_attention/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch212-cxx11-cu130-x86_64-linux/platforms.py b/build/torch212-cxx11-cu130-x86_64-linux/platforms.py new file mode 100644 index 0000000000000000000000000000000000000000..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 --- /dev/null +++ b/build/torch212-cxx11-cu130-x86_64-linux/platforms.py @@ -0,0 +1,92 @@ +import os +import random +from abc import ABC, abstractmethod +from functools import lru_cache, wraps +from typing import Callable, ParamSpec, TypeVar + +import numpy as np +import torch + +IS_ROCM = torch.version.hip is not None +IS_MPS = torch.backends.mps.is_available() + + +class Platform(ABC): + @classmethod + def seed_everything(cls, seed: int) -> None: + """ + Set the seed of each random module. + `torch.manual_seed` will set seed on all devices. + + Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20 + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + @abstractmethod + def get_device_name(self, device_id: int = 0) -> str: ... + + @abstractmethod + def is_cuda(self) -> bool: ... + + @abstractmethod + def is_rocm(self) -> bool: ... + + @abstractmethod + def is_mps(self) -> bool: ... + + +class CudaPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(0) + + def is_cuda(self) -> bool: + return True + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return False + + +class RocmPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return True + + def is_mps(self) -> bool: + return False + + +class MpsPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return True + +current_platform = ( + RocmPlatform() if IS_ROCM else + MpsPlatform() if IS_MPS else + CudaPlatform() if torch.cuda.is_available() else + None +) diff --git a/build/torch212-cxx11-cu132-x86_64-linux/__init__.py b/build/torch212-cxx11-cu132-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9de56043369487facc1f163df6bd319c9806e5ca --- /dev/null +++ b/build/torch212-cxx11-cu132-x86_64-linux/__init__.py @@ -0,0 +1,21 @@ +from ._custom_ops import ( + convert_fp8, + copy_blocks, + paged_attention_v1, + paged_attention_v2, + reshape_and_cache, + reshape_and_cache_flash, + swap_blocks, +) +from ._ops import ops + +__all__ = [ + "convert_fp8", + "copy_blocks", + "ops", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "reshape_and_cache_flash", + "swap_blocks", +] diff --git a/build/torch212-cxx11-cu132-x86_64-linux/_custom_ops.py b/build/torch212-cxx11-cu132-x86_64-linux/_custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c0b8db085468dee5100c98d14106a9ee917bf2 --- /dev/null +++ b/build/torch212-cxx11-cu132-x86_64-linux/_custom_ops.py @@ -0,0 +1,173 @@ +from typing import List, Optional + +import torch + +from ._ops import ops + + +# page attention ops +def paged_attention_v1( + out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v1( + out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def paged_attention_v2( + out: torch.Tensor, + exp_sum: torch.Tensor, + max_logits: torch.Tensor, + tmp_out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v2( + out, + exp_sum, + max_logits, + tmp_out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def reshape_and_cache( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: float, + v_scale: float, +) -> None: + ops.reshape_and_cache( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def reshape_and_cache_flash( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: torch.Tensor, + v_scale: torch.Tensor, +) -> None: + ops.reshape_and_cache_flash( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def copy_blocks( + key_caches: List[torch.Tensor], + value_caches: List[torch.Tensor], + block_mapping: torch.Tensor, +) -> None: + ops.copy_blocks(key_caches, value_caches, block_mapping) + + +def swap_blocks( + src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor +) -> None: + ops.swap_blocks(src, dst, block_mapping) + + +def convert_fp8( + output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8" +) -> None: + ops.convert_fp8(output, input, scale, kv_dtype) + + +__all__ = [ + "convert_fp8", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "copy_blocks", +] diff --git a/build/torch212-cxx11-cu132-x86_64-linux/_ops.py b/build/torch212-cxx11-cu132-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..08f25325eaf8e3891c61289578641347f860e4b3 --- /dev/null +++ b/build/torch212-cxx11-cu132-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _paged_attention_cuda_2567cd6 +ops = torch.ops._paged_attention_cuda_2567cd6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_paged_attention_cuda_2567cd6::{op_name}" diff --git a/build/torch212-cxx11-cu132-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so b/build/torch212-cxx11-cu132-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..68b4790057161108dc49ec95f9c4d97132237abe --- /dev/null +++ b/build/torch212-cxx11-cu132-x86_64-linux/_paged_attention_cuda_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e73a4f843a1dbe0a09db2ed7322d4f1c847d9fb6e1e7dd62a3d5cce1e206797 +size 85413864 diff --git a/build/torch212-cxx11-cu132-x86_64-linux/metadata.json b/build/torch212-cxx11-cu132-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..61f5a0521120b48141a0edc597691f9ba67885c7 --- /dev/null +++ b/build/torch212-cxx11-cu132-x86_64-linux/metadata.json @@ -0,0 +1,21 @@ +{ + "name": "paged-attention", + "id": "_paged_attention_cuda_2567cd6", + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "cuda", + "archs": [ + "10.0", + "11.0", + "12.0+PTX", + "7.5", + "8.0", + "8.6", + "8.7", + "8.9", + "9.0" + ] + } +} diff --git a/build/torch212-cxx11-cu132-x86_64-linux/paged_attention/__init__.py b/build/torch212-cxx11-cu132-x86_64-linux/paged_attention/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch212-cxx11-cu132-x86_64-linux/paged_attention/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch212-cxx11-cu132-x86_64-linux/platforms.py b/build/torch212-cxx11-cu132-x86_64-linux/platforms.py new file mode 100644 index 0000000000000000000000000000000000000000..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 --- /dev/null +++ b/build/torch212-cxx11-cu132-x86_64-linux/platforms.py @@ -0,0 +1,92 @@ +import os +import random +from abc import ABC, abstractmethod +from functools import lru_cache, wraps +from typing import Callable, ParamSpec, TypeVar + +import numpy as np +import torch + +IS_ROCM = torch.version.hip is not None +IS_MPS = torch.backends.mps.is_available() + + +class Platform(ABC): + @classmethod + def seed_everything(cls, seed: int) -> None: + """ + Set the seed of each random module. + `torch.manual_seed` will set seed on all devices. + + Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20 + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + @abstractmethod + def get_device_name(self, device_id: int = 0) -> str: ... + + @abstractmethod + def is_cuda(self) -> bool: ... + + @abstractmethod + def is_rocm(self) -> bool: ... + + @abstractmethod + def is_mps(self) -> bool: ... + + +class CudaPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(0) + + def is_cuda(self) -> bool: + return True + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return False + + +class RocmPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return True + + def is_mps(self) -> bool: + return False + + +class MpsPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return True + +current_platform = ( + RocmPlatform() if IS_ROCM else + MpsPlatform() if IS_MPS else + CudaPlatform() if torch.cuda.is_available() else + None +) diff --git a/build/torch212-cxx11-rocm71-x86_64-linux/__init__.py b/build/torch212-cxx11-rocm71-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9de56043369487facc1f163df6bd319c9806e5ca --- /dev/null +++ b/build/torch212-cxx11-rocm71-x86_64-linux/__init__.py @@ -0,0 +1,21 @@ +from ._custom_ops import ( + convert_fp8, + copy_blocks, + paged_attention_v1, + paged_attention_v2, + reshape_and_cache, + reshape_and_cache_flash, + swap_blocks, +) +from ._ops import ops + +__all__ = [ + "convert_fp8", + "copy_blocks", + "ops", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "reshape_and_cache_flash", + "swap_blocks", +] diff --git a/build/torch212-cxx11-rocm71-x86_64-linux/_custom_ops.py b/build/torch212-cxx11-rocm71-x86_64-linux/_custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c0b8db085468dee5100c98d14106a9ee917bf2 --- /dev/null +++ b/build/torch212-cxx11-rocm71-x86_64-linux/_custom_ops.py @@ -0,0 +1,173 @@ +from typing import List, Optional + +import torch + +from ._ops import ops + + +# page attention ops +def paged_attention_v1( + out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v1( + out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def paged_attention_v2( + out: torch.Tensor, + exp_sum: torch.Tensor, + max_logits: torch.Tensor, + tmp_out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v2( + out, + exp_sum, + max_logits, + tmp_out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def reshape_and_cache( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: float, + v_scale: float, +) -> None: + ops.reshape_and_cache( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def reshape_and_cache_flash( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: torch.Tensor, + v_scale: torch.Tensor, +) -> None: + ops.reshape_and_cache_flash( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def copy_blocks( + key_caches: List[torch.Tensor], + value_caches: List[torch.Tensor], + block_mapping: torch.Tensor, +) -> None: + ops.copy_blocks(key_caches, value_caches, block_mapping) + + +def swap_blocks( + src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor +) -> None: + ops.swap_blocks(src, dst, block_mapping) + + +def convert_fp8( + output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8" +) -> None: + ops.convert_fp8(output, input, scale, kv_dtype) + + +__all__ = [ + "convert_fp8", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "copy_blocks", +] diff --git a/build/torch212-cxx11-rocm71-x86_64-linux/_ops.py b/build/torch212-cxx11-rocm71-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..da470d69423ba6440c271ea421c44bf1d3726853 --- /dev/null +++ b/build/torch212-cxx11-rocm71-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _paged_attention_rocm_2567cd6 +ops = torch.ops._paged_attention_rocm_2567cd6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_paged_attention_rocm_2567cd6::{op_name}" diff --git a/build/torch212-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so b/build/torch212-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..3cc4aee1ae8cc35d71e58bc6feab7d7d79e70ed7 --- /dev/null +++ b/build/torch212-cxx11-rocm71-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8a2383c147191399662d480b1497730070d1a4d2ade1a88da8bd6956703c00 +size 84908504 diff --git a/build/torch212-cxx11-rocm71-x86_64-linux/metadata.json b/build/torch212-cxx11-rocm71-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5a8c4d218743dcdd3752770553dfaf6cfc59ae12 --- /dev/null +++ b/build/torch212-cxx11-rocm71-x86_64-linux/metadata.json @@ -0,0 +1,22 @@ +{ + "name": "paged-attention", + "id": "_paged_attention_rocm_2567cd6", + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "rocm", + "archs": [ + "gfx1030", + "gfx1100", + "gfx1101", + "gfx1200", + "gfx1201", + "gfx906", + "gfx908", + "gfx90a", + "gfx942", + "gfx950" + ] + } +} diff --git a/build/torch212-cxx11-rocm71-x86_64-linux/paged_attention/__init__.py b/build/torch212-cxx11-rocm71-x86_64-linux/paged_attention/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch212-cxx11-rocm71-x86_64-linux/paged_attention/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch212-cxx11-rocm71-x86_64-linux/platforms.py b/build/torch212-cxx11-rocm71-x86_64-linux/platforms.py new file mode 100644 index 0000000000000000000000000000000000000000..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 --- /dev/null +++ b/build/torch212-cxx11-rocm71-x86_64-linux/platforms.py @@ -0,0 +1,92 @@ +import os +import random +from abc import ABC, abstractmethod +from functools import lru_cache, wraps +from typing import Callable, ParamSpec, TypeVar + +import numpy as np +import torch + +IS_ROCM = torch.version.hip is not None +IS_MPS = torch.backends.mps.is_available() + + +class Platform(ABC): + @classmethod + def seed_everything(cls, seed: int) -> None: + """ + Set the seed of each random module. + `torch.manual_seed` will set seed on all devices. + + Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20 + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + @abstractmethod + def get_device_name(self, device_id: int = 0) -> str: ... + + @abstractmethod + def is_cuda(self) -> bool: ... + + @abstractmethod + def is_rocm(self) -> bool: ... + + @abstractmethod + def is_mps(self) -> bool: ... + + +class CudaPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(0) + + def is_cuda(self) -> bool: + return True + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return False + + +class RocmPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return True + + def is_mps(self) -> bool: + return False + + +class MpsPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return True + +current_platform = ( + RocmPlatform() if IS_ROCM else + MpsPlatform() if IS_MPS else + CudaPlatform() if torch.cuda.is_available() else + None +) diff --git a/build/torch212-cxx11-rocm72-x86_64-linux/__init__.py b/build/torch212-cxx11-rocm72-x86_64-linux/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9de56043369487facc1f163df6bd319c9806e5ca --- /dev/null +++ b/build/torch212-cxx11-rocm72-x86_64-linux/__init__.py @@ -0,0 +1,21 @@ +from ._custom_ops import ( + convert_fp8, + copy_blocks, + paged_attention_v1, + paged_attention_v2, + reshape_and_cache, + reshape_and_cache_flash, + swap_blocks, +) +from ._ops import ops + +__all__ = [ + "convert_fp8", + "copy_blocks", + "ops", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "reshape_and_cache_flash", + "swap_blocks", +] diff --git a/build/torch212-cxx11-rocm72-x86_64-linux/_custom_ops.py b/build/torch212-cxx11-rocm72-x86_64-linux/_custom_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..a0c0b8db085468dee5100c98d14106a9ee917bf2 --- /dev/null +++ b/build/torch212-cxx11-rocm72-x86_64-linux/_custom_ops.py @@ -0,0 +1,173 @@ +from typing import List, Optional + +import torch + +from ._ops import ops + + +# page attention ops +def paged_attention_v1( + out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v1( + out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def paged_attention_v2( + out: torch.Tensor, + exp_sum: torch.Tensor, + max_logits: torch.Tensor, + tmp_out: torch.Tensor, + query: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + num_kv_heads: int, + scale: float, + block_tables: torch.Tensor, + seq_lens: torch.Tensor, + block_size: int, + max_seq_len: int, + alibi_slopes: Optional[torch.Tensor], + kv_cache_dtype: str, + k_scale: float, + v_scale: float, + tp_rank: int = 0, + blocksparse_local_blocks: int = 0, + blocksparse_vert_stride: int = 0, + blocksparse_block_size: int = 64, + blocksparse_head_sliding_step: int = 0, +) -> None: + ops.paged_attention_v2( + out, + exp_sum, + max_logits, + tmp_out, + query, + key_cache, + value_cache, + num_kv_heads, + scale, + block_tables, + seq_lens, + block_size, + max_seq_len, + alibi_slopes, + kv_cache_dtype, + k_scale, + v_scale, + tp_rank, + blocksparse_local_blocks, + blocksparse_vert_stride, + blocksparse_block_size, + blocksparse_head_sliding_step, + ) + + +def reshape_and_cache( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: float, + v_scale: float, +) -> None: + ops.reshape_and_cache( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def reshape_and_cache_flash( + key: torch.Tensor, + value: torch.Tensor, + key_cache: torch.Tensor, + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, + kv_cache_dtype: str, + k_scale: torch.Tensor, + v_scale: torch.Tensor, +) -> None: + ops.reshape_and_cache_flash( + key, + value, + key_cache, + value_cache, + slot_mapping, + kv_cache_dtype, + k_scale, + v_scale, + ) + + +def copy_blocks( + key_caches: List[torch.Tensor], + value_caches: List[torch.Tensor], + block_mapping: torch.Tensor, +) -> None: + ops.copy_blocks(key_caches, value_caches, block_mapping) + + +def swap_blocks( + src: torch.Tensor, dst: torch.Tensor, block_mapping: torch.Tensor +) -> None: + ops.swap_blocks(src, dst, block_mapping) + + +def convert_fp8( + output: torch.Tensor, input: torch.Tensor, scale: float = 1.0, kv_dtype: str = "fp8" +) -> None: + ops.convert_fp8(output, input, scale, kv_dtype) + + +__all__ = [ + "convert_fp8", + "paged_attention_v1", + "paged_attention_v2", + "reshape_and_cache", + "copy_blocks", +] diff --git a/build/torch212-cxx11-rocm72-x86_64-linux/_ops.py b/build/torch212-cxx11-rocm72-x86_64-linux/_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..da470d69423ba6440c271ea421c44bf1d3726853 --- /dev/null +++ b/build/torch212-cxx11-rocm72-x86_64-linux/_ops.py @@ -0,0 +1,9 @@ +import torch +from . import _paged_attention_rocm_2567cd6 +ops = torch.ops._paged_attention_rocm_2567cd6 + +def add_op_namespace_prefix(op_name: str): + """ + Prefix op by namespace. + """ + return f"_paged_attention_rocm_2567cd6::{op_name}" diff --git a/build/torch212-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so b/build/torch212-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so new file mode 100644 index 0000000000000000000000000000000000000000..07cce0eaa540687957acbd9c1a1ad3c62e6eae20 --- /dev/null +++ b/build/torch212-cxx11-rocm72-x86_64-linux/_paged_attention_rocm_2567cd6.abi3.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d386e7ea48cf27c29b7c8e038b518562320686c14750d6c71e0bb8c36ee16459 +size 81663288 diff --git a/build/torch212-cxx11-rocm72-x86_64-linux/metadata.json b/build/torch212-cxx11-rocm72-x86_64-linux/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5a8c4d218743dcdd3752770553dfaf6cfc59ae12 --- /dev/null +++ b/build/torch212-cxx11-rocm72-x86_64-linux/metadata.json @@ -0,0 +1,22 @@ +{ + "name": "paged-attention", + "id": "_paged_attention_rocm_2567cd6", + "version": 1, + "license": "Apache-2.0", + "python-depends": [], + "backend": { + "type": "rocm", + "archs": [ + "gfx1030", + "gfx1100", + "gfx1101", + "gfx1200", + "gfx1201", + "gfx906", + "gfx908", + "gfx90a", + "gfx942", + "gfx950" + ] + } +} diff --git a/build/torch212-cxx11-rocm72-x86_64-linux/paged_attention/__init__.py b/build/torch212-cxx11-rocm72-x86_64-linux/paged_attention/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..a9b2672c1cd85b74c1b3ded0fc0b2100e1aeac23 --- /dev/null +++ b/build/torch212-cxx11-rocm72-x86_64-linux/paged_attention/__init__.py @@ -0,0 +1,26 @@ +import ctypes +import importlib.util +import sys +from pathlib import Path +from types import ModuleType + + +def _import_from_path(file_path: Path) -> ModuleType: + # We cannot use the module name as-is, after adding it to `sys.modules`, + # it would also be used for other imports. So, we make a module name that + # depends on the path for it to be unique using the hex-encoded hash of + # the path. + path_hash = "{:x}".format(ctypes.c_size_t(hash(file_path.absolute())).value) + module_name = path_hash + spec = importlib.util.spec_from_file_location(module_name, file_path) + if spec is None: + raise ImportError(f"Cannot load spec for {module_name} from {file_path}") + module = importlib.util.module_from_spec(spec) + if module is None: + raise ImportError(f"Cannot load module {module_name} from spec") + sys.modules[module_name] = module + spec.loader.exec_module(module) # type: ignore + return module + + +globals().update(vars(_import_from_path(Path(__file__).parent.parent / "__init__.py"))) diff --git a/build/torch212-cxx11-rocm72-x86_64-linux/platforms.py b/build/torch212-cxx11-rocm72-x86_64-linux/platforms.py new file mode 100644 index 0000000000000000000000000000000000000000..6277d5f50ff3ddc265bb39fa1c4d17e0341b7767 --- /dev/null +++ b/build/torch212-cxx11-rocm72-x86_64-linux/platforms.py @@ -0,0 +1,92 @@ +import os +import random +from abc import ABC, abstractmethod +from functools import lru_cache, wraps +from typing import Callable, ParamSpec, TypeVar + +import numpy as np +import torch + +IS_ROCM = torch.version.hip is not None +IS_MPS = torch.backends.mps.is_available() + + +class Platform(ABC): + @classmethod + def seed_everything(cls, seed: int) -> None: + """ + Set the seed of each random module. + `torch.manual_seed` will set seed on all devices. + + Loosely based on: https://github.com/Lightning-AI/pytorch-lightning/blob/2.4.0/src/lightning/fabric/utilities/seed.py#L20 + """ + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + @abstractmethod + def get_device_name(self, device_id: int = 0) -> str: ... + + @abstractmethod + def is_cuda(self) -> bool: ... + + @abstractmethod + def is_rocm(self) -> bool: ... + + @abstractmethod + def is_mps(self) -> bool: ... + + +class CudaPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(0) + + def is_cuda(self) -> bool: + return True + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return False + + +class RocmPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return True + + def is_mps(self) -> bool: + return False + + +class MpsPlatform(Platform): + @classmethod + @lru_cache(maxsize=8) + def get_device_name(cls, device_id: int = 0) -> str: + return torch.cuda.get_device_name(device_id) + + def is_cuda(self) -> bool: + return False + + def is_rocm(self) -> bool: + return False + + def is_mps(self) -> bool: + return True + +current_platform = ( + RocmPlatform() if IS_ROCM else + MpsPlatform() if IS_MPS else + CudaPlatform() if torch.cuda.is_available() else + None +)