Upload 4 files
Browse files- .gitattributes +1 -0
- triton/VC_redist.x64.exe +3 -0
- triton/python_3.11.9_include_libs.zip +3 -0
- triton/python_3.12.7_include_libs.zip +3 -0
- triton/test_triton.py +27 -0
.gitattributes
CHANGED
|
@@ -44,3 +44,4 @@ insightface/insightface-0.7.3-cp311-cp311-win_amd64.whl filter=lfs diff=lfs merg
|
|
| 44 |
insightface/insightface-0.7.3-cp312-cp312-win_amd64.whl filter=lfs diff=lfs merge=lfs -text
|
| 45 |
insightface/insightface-0.7.3-cp313-cp313-win_amd64.whl filter=lfs diff=lfs merge=lfs -text
|
| 46 |
insightface/insightface-0.7.3-cp39-cp39-win_amd64.whl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 44 |
insightface/insightface-0.7.3-cp312-cp312-win_amd64.whl filter=lfs diff=lfs merge=lfs -text
|
| 45 |
insightface/insightface-0.7.3-cp313-cp313-win_amd64.whl filter=lfs diff=lfs merge=lfs -text
|
| 46 |
insightface/insightface-0.7.3-cp39-cp39-win_amd64.whl filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
triton/VC_redist.x64.exe filter=lfs diff=lfs merge=lfs -text
|
triton/VC_redist.x64.exe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc0ff0eb1dc3f5188ae6300faef32bf5beeba4bdd6e8e445a9184072096b713b
|
| 3 |
+
size 25635768
|
triton/python_3.11.9_include_libs.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2cc1d8e2cad23401c152d690669605d4356e12c638954d1e9231b85e1c79966
|
| 3 |
+
size 372089
|
triton/python_3.12.7_include_libs.zip
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:145cde69c753dd5a78f4525ef1f5cbe0f2642678fdefd9e79e2bbea1a91d80ea
|
| 3 |
+
size 437666
|
triton/test_triton.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import triton
|
| 3 |
+
import triton.language as tl
|
| 4 |
+
|
| 5 |
+
@triton.jit
|
| 6 |
+
def add_kernel(x_ptr, y_ptr, output_ptr, n_elements, BLOCK_SIZE: tl.constexpr):
|
| 7 |
+
pid = tl.program_id(axis=0)
|
| 8 |
+
block_start = pid * BLOCK_SIZE
|
| 9 |
+
offsets = block_start + tl.arange(0, BLOCK_SIZE)
|
| 10 |
+
mask = offsets < n_elements
|
| 11 |
+
x = tl.load(x_ptr + offsets, mask=mask)
|
| 12 |
+
y = tl.load(y_ptr + offsets, mask=mask)
|
| 13 |
+
output = x + y
|
| 14 |
+
tl.store(output_ptr + offsets, output, mask=mask)
|
| 15 |
+
|
| 16 |
+
def add(x: torch.Tensor, y: torch.Tensor):
|
| 17 |
+
output = torch.empty_like(x)
|
| 18 |
+
n_elements = output.numel()
|
| 19 |
+
grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
|
| 20 |
+
add_kernel[grid](x, y, output, n_elements, BLOCK_SIZE=1024)
|
| 21 |
+
return output
|
| 22 |
+
|
| 23 |
+
a = torch.rand(3, device="cuda")
|
| 24 |
+
b = a + a
|
| 25 |
+
b_compiled = add(a, a)
|
| 26 |
+
print(b_compiled - b)
|
| 27 |
+
print("If you see tensor([0., 0., 0.], device='cuda:0'), then it works")
|