Commit
·
42f4907
1
Parent(s):
4ee9d9e
Added num stages tuning
Browse files- compressed_attention.py +2 -1
compressed_attention.py
CHANGED
|
@@ -28,8 +28,9 @@ IS_HOPPER_GPU = is_hopper_gpu()
|
|
| 28 |
|
| 29 |
@triton.autotune(
|
| 30 |
configs=[
|
| 31 |
-
triton.Config({}, num_warps=num_warps)
|
| 32 |
for num_warps in [1, 2, 4, 8]
|
|
|
|
| 33 |
],
|
| 34 |
key=['HEAD_DIM', 'BLOCK_SIZE_Q', 'BLOCK_SIZE_K', 'BLOCK_SIZE_V'],
|
| 35 |
)
|
|
|
|
| 28 |
|
| 29 |
@triton.autotune(
|
| 30 |
configs=[
|
| 31 |
+
triton.Config({}, num_warps=num_warps, num_stages=num_stages)
|
| 32 |
for num_warps in [1, 2, 4, 8]
|
| 33 |
+
for num_stages in [1, 2, 3]
|
| 34 |
],
|
| 35 |
key=['HEAD_DIM', 'BLOCK_SIZE_Q', 'BLOCK_SIZE_K', 'BLOCK_SIZE_V'],
|
| 36 |
)
|